[llvm-commits] [llvm] r161152 - in /llvm/trunk: include/llvm/Target/TargetInstrInfo.h lib/CodeGen/PeepholeOptimizer.cpp lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/2012-05-19-avx2-store.ll test/CodeGen/X86/break-sse-dep.ll test/CodeGen/X86/fold-load.ll test/CodeGen/X86/fold-pcmpeqd-1.ll test/CodeGen/X86/sse-minmax.ll test/CodeGen/X86/vec_compare.ll

Thu Aug 2 00:31:24 PDT 2012

Some cases are considered conflicting with the previous effort to remove
partial register update stall by Bruno Cardoso Lopes.

For example, sqrtsd with memory operand is such an instruction updating
only parts of the registers in SSE. It should be selected if the code is
optimized for size. Otherwise, the sequence of movsd + sqrtsd is
preferred than sqrtsd with memory operand.

Yours
- Michael

On Thu, 2012-08-02 at 00:56 +0000, Manman Ren wrote:
> Author: mren
> Date: Wed Aug  1 19:56:42 2012
> New Revision: 161152
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=161152&view=rev
> Log:
> X86 Peephole: fold loads to the source register operand if possible.
> 
> Machine CSE and other optimizations can remove instructions so folding
> is possible at peephole while not possible at ISel.
> 
> This patch is a rework of r160919 and was tested on clang self-host on my local
> machine.
> 
> rdar://10554090 and rdar://11873276
> 
> Modified:
>     llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>     llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>     llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>     llvm/trunk/lib/Target/X86/X86InstrInfo.h
>     llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll
>     llvm/trunk/test/CodeGen/X86/break-sse-dep.ll
>     llvm/trunk/test/CodeGen/X86/fold-load.ll
>     llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll
>     llvm/trunk/test/CodeGen/X86/sse-minmax.ll
>     llvm/trunk/test/CodeGen/X86/vec_compare.ll
> 
> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Wed Aug  1 19:56:42 2012
> @@ -14,6 +14,7 @@
>  #ifndef LLVM_TARGET_TARGETINSTRINFO_H
>  #define LLVM_TARGET_TARGETINSTRINFO_H
>  
> +#include "llvm/ADT/SmallSet.h"
>  #include "llvm/MC/MCInstrInfo.h"
>  #include "llvm/CodeGen/DFAPacketizer.h"
>  #include "llvm/CodeGen/MachineFunction.h"
> @@ -693,6 +694,16 @@
>      return false;
>    }
>  
> +  /// optimizeLoadInstr - Try to remove the load by folding it to a register
> +  /// operand at the use. We fold the load instructions if and only if the
> +  /// def and use are in the same BB.
> +  virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
> +                        const MachineRegisterInfo *MRI,
> +                        unsigned &FoldAsLoadDefReg,
> +                        MachineInstr *&DefMI) const {
> +    return 0;
> +  }
> +
>    /// FoldImmediate - 'Reg' is known to be defined by a move immediate
>    /// instruction, try to fold the immediate into the use instruction.
>    virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
> 
> Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
> +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Wed Aug  1 19:56:42 2012
> @@ -78,6 +78,7 @@
>  STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
>  STATISTIC(NumCmps,       "Number of compares eliminated");
>  STATISTIC(NumImmFold,    "Number of move immediate folded");
> +STATISTIC(NumLoadFold,   "Number of loads folded");
>  
>  namespace {
>    class PeepholeOptimizer : public MachineFunctionPass {
> @@ -114,6 +115,7 @@
>      bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
>                         SmallSet<unsigned, 4> &ImmDefRegs,
>                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
> +    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
>    };
>  }
>  
> @@ -384,6 +386,29 @@
>    return false;
>  }
>  
> +/// isLoadFoldable - Check whether MI is a candidate for folding into a later
> +/// instruction. We only fold loads to virtual registers and the virtual
> +/// register defined has a single use.
> +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
> +                                       unsigned &FoldAsLoadDefReg) {
> +  if (MI->canFoldAsLoad()) {
> +    const MCInstrDesc &MCID = MI->getDesc();
> +    if (MCID.getNumDefs() == 1) {
> +      unsigned Reg = MI->getOperand(0).getReg();
> +      // To reduce compilation time, we check MRI->hasOneUse when inserting
> +      // loads. It should be checked when processing uses of the load, since
> +      // uses can be removed during peephole.
> +      if (!MI->getOperand(0).getSubReg() &&
> +          TargetRegisterInfo::isVirtualRegister(Reg) &&
> +          MRI->hasOneUse(Reg)) {
> +        FoldAsLoadDefReg = Reg;
> +        return true;
> +      }
> +    }
> +  }
> +  return false;
> +}
> +
>  bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
>                                          SmallSet<unsigned, 4> &ImmDefRegs,
>                                   DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
> @@ -441,6 +466,7 @@
>    SmallPtrSet<MachineInstr*, 8> LocalMIs;
>    SmallSet<unsigned, 4> ImmDefRegs;
>    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
> +  unsigned FoldAsLoadDefReg;
>    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
>      MachineBasicBlock *MBB = &*I;
>  
> @@ -448,6 +474,7 @@
>      LocalMIs.clear();
>      ImmDefRegs.clear();
>      ImmDefMIs.clear();
> +    FoldAsLoadDefReg = 0;
>  
>      bool First = true;
>      MachineBasicBlock::iterator PMII;
> @@ -456,12 +483,17 @@
>        MachineInstr *MI = &*MII;
>        LocalMIs.insert(MI);
>  
> +      // If there exists an instruction which belongs to the following
> +      // categories, we will discard the load candidate.
>        if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
>            MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
>            MI->hasUnmodeledSideEffects()) {
> +        FoldAsLoadDefReg = 0;
>          ++MII;
>          continue;
>        }
> +      if (MI->mayStore() || MI->isCall())
> +        FoldAsLoadDefReg = 0;
>  
>        if (MI->isBitcast()) {
>          if (optimizeBitcastInstr(MI, MBB)) {
> @@ -489,6 +521,31 @@
>            Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
>        }
>  
> +      // Check whether MI is a load candidate for folding into a later
> +      // instruction. If MI is not a candidate, check whether we can fold an
> +      // earlier load into MI.
> +      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
> +        // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
> +        // can enable folding by converting SUB to CMP.
> +        MachineInstr *DefMI = 0;
> +        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
> +                                                      FoldAsLoadDefReg, DefMI);
> +        if (FoldMI) {
> +          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
> +          LocalMIs.erase(MI);
> +          LocalMIs.erase(DefMI);
> +          LocalMIs.insert(FoldMI);
> +          MI->eraseFromParent();
> +          DefMI->eraseFromParent();
> +          ++NumLoadFold;
> +
> +          // MI is replaced with FoldMI.
> +          Changed = true;
> +          PMII = FoldMI;
> +          MII = llvm::next(PMII);
> +          continue;
> +        }
> +      }
>        First = false;
>        PMII = MII;
>        ++MII;
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Aug  1 19:56:42 2012
> @@ -3323,6 +3323,81 @@
>    return true;
>  }
>  
> +/// optimizeLoadInstr - Try to remove the load by folding it to a register
> +/// operand at the use. We fold the load instructions if load defines a virtual
> +/// register, the virtual register is used once in the same BB, and the
> +/// instructions in-between do not load or store, and have no side effects.
> +MachineInstr* X86InstrInfo::
> +optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
> +                  unsigned &FoldAsLoadDefReg,
> +                  MachineInstr *&DefMI) const {
> +  if (FoldAsLoadDefReg == 0)
> +    return 0;
> +  // To be conservative, if there exists another load, clear the load candidate.
> +  if (MI->mayLoad()) {
> +    FoldAsLoadDefReg = 0;
> +    return 0;
> +  }
> +
> +  // Check whether we can move DefMI here.
> +  DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
> +  assert(DefMI);
> +  bool SawStore = false;
> +  if (!DefMI->isSafeToMove(this, 0, SawStore))
> +    return 0;
> +
> +  // We try to commute MI if possible.
> +  unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
> +  for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
> +    // Collect information about virtual register operands of MI.
> +    unsigned SrcOperandId = 0;
> +    bool FoundSrcOperand = false;
> +    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
> +      MachineOperand &MO = MI->getOperand(i);
> +      if (!MO.isReg())
> +        continue;
> +      unsigned Reg = MO.getReg();
> +      if (Reg != FoldAsLoadDefReg)
> +        continue;
> +      // Do not fold if we have a subreg use or a def or multiple uses.
> +      if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
> +        return 0;
> +
> +      SrcOperandId = i;
> +      FoundSrcOperand = true;
> +    }
> +    if (!FoundSrcOperand) return 0;
> +
> +    // Check whether we can fold the def into SrcOperandId.
> +    SmallVector<unsigned, 8> Ops;
> +    Ops.push_back(SrcOperandId);
> +    MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
> +    if (FoldMI) {
> +      FoldAsLoadDefReg = 0;
> +      return FoldMI;
> +    }
> +
> +    if (Idx == 1) {
> +      // MI was changed but it didn't help, commute it back!
> +      commuteInstruction(MI, false);
> +      return 0;
> +    }
> +
> +    // Check whether we can commute MI and enable folding.
> +    if (MI->isCommutable()) {
> +      MachineInstr *NewMI = commuteInstruction(MI, false);
> +      // Unable to commute.
> +      if (!NewMI) return 0;
> +      if (NewMI != MI) {
> +        // New instruction. It doesn't need to be kept.
> +        NewMI->eraseFromParent();
> +        return 0;
> +      }
> +    }
> +  }
> +  return 0;
> +}
> +
>  /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
>  /// instruction with two undef reads of the register being defined.  This is
>  /// used for mapping:
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Aug  1 19:56:42 2012
> @@ -387,6 +387,14 @@
>                                      unsigned SrcReg2, int CmpMask, int CmpValue,
>                                      const MachineRegisterInfo *MRI) const;
>  
> +  /// optimizeLoadInstr - Try to remove the load by folding it to a register
> +  /// operand at the use. We fold the load instructions if and only if the
> +  /// def and use are in the same BB.
> +  virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
> +                        const MachineRegisterInfo *MRI,
> +                        unsigned &FoldAsLoadDefReg,
> +                        MachineInstr *&DefMI) const;
> +
>  private:
>    MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
>                                                MachineFunction::iterator &MFI,
> 
> Modified: llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll Wed Aug  1 19:56:42 2012
> @@ -3,8 +3,7 @@
>  define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
>  entry:
>    ; CHECK: vmovaps
> -  ; CHECK: vmovaps
> -  ; CHECK: vinsertf128
> +  ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
>    ; CHECK: vmovups
>    %A = load <4 x i32>* %Ap
>    %B = load <4 x i32>* %Bp
> 
> Modified: llvm/trunk/test/CodeGen/X86/break-sse-dep.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/break-sse-dep.ll?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/break-sse-dep.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/break-sse-dep.ll Wed Aug  1 19:56:42 2012
> @@ -34,8 +34,7 @@
>  define double @squirt(double* %x) nounwind {
>  entry:
>  ; CHECK: squirt:
> -; CHECK: movsd ([[A0]]), %xmm0
> -; CHECK: sqrtsd %xmm0, %xmm0
> +; CHECK: sqrtsd ([[A0]]), %xmm0
>    %z = load double* %x
>    %t = call double @llvm.sqrt.f64(double %z)
>    ret double %t
> 
> Modified: llvm/trunk/test/CodeGen/X86/fold-load.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-load.ll?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fold-load.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fold-load.ll Wed Aug  1 19:56:42 2012
> @@ -45,3 +45,29 @@
>  
>  }
>  
> +; rdar://10554090
> +; xor in exit block will be CSE'ed and load will be folded to xor in entry.
> +define i1 @test3(i32* %P, i32* %Q) nounwind {
> +; CHECK: test3:
> +; CHECK: movl 8(%esp), %eax
> +; CHECK: xorl (%eax),
> +; CHECK: j
> +; CHECK-NOT: xor
> +entry:
> +  %0 = load i32* %P, align 4
> +  %1 = load i32* %Q, align 4
> +  %2 = xor i32 %0, %1
> +  %3 = and i32 %2, 65535
> +  %4 = icmp eq i32 %3, 0
> +  br i1 %4, label %exit, label %land.end
> +
> +exit:
> +  %shr.i.i19 = xor i32 %1, %0
> +  %5 = and i32 %shr.i.i19, 2147418112
> +  %6 = icmp eq i32 %5, 0
> +  br label %land.end
> +
> +land.end:
> +  %7 = phi i1 [ %6, %exit ], [ false, %entry ]
> +  ret i1 %7
> +}
> 
> Modified: llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll Wed Aug  1 19:56:42 2012
> @@ -1,11 +1,14 @@
> -; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
> -; RUN: grep pcmpeqd %t | count 1
> -; RUN: grep xor %t | count 1
> -; RUN: not grep LCP %t
> +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
>  
>  define <2 x double> @foo() nounwind {
>    ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
> +; CHECK: foo:
> +; CHECK: pcmpeqd %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
> +; CHECK-NEXT: ret
>  }
>  define <2 x double> @bar() nounwind {
>    ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
> +; CHECK: bar:
> +; CHECK: xorps %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
> +; CHECK-NEXT: ret
>  }
> 
> Modified: llvm/trunk/test/CodeGen/X86/sse-minmax.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-minmax.ll?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-minmax.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-minmax.ll Wed Aug  1 19:56:42 2012
> @@ -1,6 +1,6 @@
> -; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false  | FileCheck %s
> -; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
> -; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
> +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false  | FileCheck %s
> +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
> +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
>  
>  ; Some of these patterns can be matched as SSE min or max. Some of
>  ; then can be matched provided that the operands are swapped.
> @@ -137,16 +137,13 @@
>  }
>  
>  ; CHECK:      ogt_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: maxsd %xmm1, %xmm0
> +; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
>  ; CHECK-NEXT: ret
>  ; UNSAFE:      ogt_x:
> -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      ogt_x:
> -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; FINITE-NEXT: maxsd %xmm1, %xmm0
> +; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @ogt_x(double %x) nounwind {
>    %c = fcmp ogt double %x, 0.000000e+00
> @@ -155,16 +152,13 @@
>  }
>  
>  ; CHECK:      olt_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: minsd %xmm1, %xmm0
> +; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
>  ; CHECK-NEXT: ret
>  ; UNSAFE:      olt_x:
> -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      olt_x:
> -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; FINITE-NEXT: minsd %xmm1, %xmm0
> +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @olt_x(double %x) nounwind {
>    %c = fcmp olt double %x, 0.000000e+00
> @@ -217,12 +211,10 @@
>  ; CHECK:      oge_x:
>  ; CHECK:      ucomisd %xmm1, %xmm0
>  ; UNSAFE:      oge_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      oge_x:
> -; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; FINITE-NEXT: maxsd   %xmm1, %xmm0
> +; FINITE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @oge_x(double %x) nounwind {
>    %c = fcmp oge double %x, 0.000000e+00
> @@ -233,12 +225,10 @@
>  ; CHECK:      ole_x:
>  ; CHECK:      ucomisd %xmm0, %xmm1
>  ; UNSAFE:      ole_x:
> -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      ole_x:
> -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; FINITE-NEXT: minsd %xmm1, %xmm0
> +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @ole_x(double %x) nounwind {
>    %c = fcmp ole double %x, 0.000000e+00
> @@ -411,12 +401,10 @@
>  ; CHECK:      ugt_x:
>  ; CHECK:      ucomisd %xmm0, %xmm1
>  ; UNSAFE:      ugt_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      ugt_x:
> -; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; FINITE-NEXT: maxsd   %xmm1, %xmm0
> +; FINITE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @ugt_x(double %x) nounwind {
>    %c = fcmp ugt double %x, 0.000000e+00
> @@ -427,12 +415,10 @@
>  ; CHECK:      ult_x:
>  ; CHECK:      ucomisd %xmm1, %xmm0
>  ; UNSAFE:      ult_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd   %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd   LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      ult_x:
> -; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; FINITE-NEXT: minsd   %xmm1, %xmm0
> +; FINITE-NEXT: minsd   LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @ult_x(double %x) nounwind {
>    %c = fcmp ult double %x, 0.000000e+00
> @@ -482,12 +468,10 @@
>  ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
>  ; CHECK-NEXT: ret
>  ; UNSAFE:      uge_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd  LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      uge_x:
> -; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; FINITE-NEXT: maxsd  %xmm1, %xmm0
> +; FINITE-NEXT: maxsd  LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @uge_x(double %x) nounwind {
>    %c = fcmp uge double %x, 0.000000e+00
> @@ -501,12 +485,10 @@
>  ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
>  ; CHECK-NEXT: ret
>  ; UNSAFE:      ule_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd  %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd  LCP{{.*}}(%rip), %xmm0
>  ; UNSAFE-NEXT: ret
>  ; FINITE:      ule_x:
> -; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; FINITE-NEXT: minsd  %xmm1, %xmm0
> +; FINITE-NEXT: minsd  LCP{{.*}}(%rip), %xmm0
>  ; FINITE-NEXT: ret
>  define double @ule_x(double %x) nounwind {
>    %c = fcmp ule double %x, 0.000000e+00
> @@ -515,8 +497,7 @@
>  }
>  
>  ; CHECK:      uge_inverse_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: minsd %xmm1, %xmm0
> +; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
>  ; CHECK-NEXT: ret
>  ; UNSAFE:      uge_inverse_x:
>  ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> @@ -535,8 +516,7 @@
>  }
>  
>  ; CHECK:      ule_inverse_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: maxsd %xmm1, %xmm0
> +; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
>  ; CHECK-NEXT: ret
>  ; UNSAFE:      ule_inverse_x:
>  ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> 
> Modified: llvm/trunk/test/CodeGen/X86/vec_compare.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_compare.ll?rev=161152&r1=161151&r2=161152&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_compare.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_compare.ll Wed Aug  1 19:56:42 2012
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
> +; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s
>  
> 
>  define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
> @@ -14,8 +14,8 @@
>  define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
>  ; CHECK: test2:
>  ; CHECK: pcmp
> -; CHECK: pcmp
> -; CHECK: pxor
> +; CHECK: pxor LCP
> +; CHECK: movdqa
>  ; CHECK: ret
>  	%C = icmp sge <4 x i32> %A, %B
>          %D = sext <4 x i1> %C to <4 x i32>
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits