[llvm-branch-commits] [llvm-branch] r117425 [7/9] - in /llvm/branches/wendling/eh: ./ autoconf/ autoconf/m4/ bindings/ada/ bindings/ocaml/llvm/ bindings/ocaml/transforms/scalar/ cmake/ cmake/modules/ docs/ docs/CommandGuide/ docs/tutorial/ examples/ examples/BrainF/ examples/ExceptionDemo/ examples/Fibonacci/ examples/Kaleidoscope/Chapter7/ examples/ModuleMaker/ include/llvm-c/ include/llvm-c/Transforms/ include/llvm/ include/llvm/ADT/ include/llvm/Analysis/ include/llvm/Assembly/ include/llvm/Bitcode/ include/llvm/CodeGen/ i...
Bill Wendling
isanbard at gmail.com
Tue Oct 26 17:48:11 PDT 2010
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrInfo.td Tue Oct 26 19:48:03 2010
@@ -1,10 +1,10 @@
-//===----------------------------------------------------------------------===//
-//
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 instruction set, defining the instructions, and
@@ -46,7 +46,7 @@
[SDTCisInt<0>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
-def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -64,6 +64,12 @@
SDTCisVT<1, iPTR>,
SDTCisVT<2, iPTR>]>;
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
def SDTX86Void : SDTypeProfile<0, 0, []>;
@@ -74,8 +80,6 @@
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
-
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -111,30 +115,30 @@
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ SDNPMayLoad, SDNPMemOperand]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
@@ -143,13 +147,16 @@
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
[SDNPHasChain, SDNPVariadic]>;
-
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
[SDNPHasChain, SDNPOutFlag]>;
def X86callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
[SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
@@ -169,13 +176,11 @@
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
- SDT_X86SegmentBaseAddress, []>;
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
-def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
@@ -185,7 +190,7 @@
[SDNPCommutative]>;
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
[SDNPCommutative]>;
-
+
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
@@ -197,9 +202,9 @@
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
-def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[]>;
@@ -261,6 +266,14 @@
let ParserMatchClass = X86MemAsmOperand;
}
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
let ParserMatchClass = X86AbsMemAsmOperand,
PrintMethod = "print_pcrel_imm" in {
@@ -332,18 +345,50 @@
let ParserMatchClass = ImmSExti32i8AsmOperand;
}
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "print_pcrel_imm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
//
// Define X86 specific addressing mode.
-def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
@@ -353,6 +398,8 @@
// no AVX version of the desired intructions is present, this is better for
// incremental dev (without fallbacks it's easier to spot what's missing)
def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
+def Has3DNow : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
@@ -414,46 +461,28 @@
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{
- return N->getSExtValue() == (int8_t)N->getSExtValue();
-}]>;
+def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
def i16immSExt8 : PatLeaf<(i16 immSext8)>;
def i32immSExt8 : PatLeaf<(i32 immSext8)>;
-
-/// Load patterns: these constraint the match to the right address space.
-def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
+def i64immSExt8 : PatLeaf<(i64 immSext8)>;
+def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
}]>;
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 256;
- return false;
-}]>;
-
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 257;
- return false;
+def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
+ uint64_t v = N->getZExtValue();
+ return v == (uint32_t)v && (int32_t)v == (int8_t)v;
}]>;
-
// Helper fragments for loads.
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
@@ -464,10 +493,6 @@
def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::EXTLOAD)
return LD->getAlignment() >= 2 && !LD->isVolatile();
@@ -476,10 +501,6 @@
def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
@@ -488,15 +509,18 @@
return false;
}]>;
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
@@ -504,6 +528,10 @@
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
@@ -511,6 +539,10 @@
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
// An 'and' node with a single use.
@@ -526,65 +558,10 @@
return N->hasOneUse();
}]>;
-// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
-def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
- APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
- return (~KnownZero0 & ~KnownZero1) == 0;
-}]>;
-
//===----------------------------------------------------------------------===//
-// Instruction list...
+// Instruction list.
//
-// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [ESP, EFLAGS], Uses = [ESP] in {
-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(X86callseq_start timm:$amt)]>,
- Requires<[In32BitMode]>;
-def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In32BitMode]>;
-}
-
-// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
-def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
- (outs),
- (ins GR8:$al,
- i64imm:$regsavefi, i64imm:$offset,
- variable_ops),
- "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
- [(X86vastart_save_xmm_regs GR8:$al,
- imm:$regsavefi,
- imm:$offset)]>;
-
-// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls
-// to _alloca is needed to probe the stack when allocating more than 4k bytes in
-// one go. Touching the stack at 4K increments is necessary to ensure that the
-// guard pages used by the OS virtual memory manager are allocated in correct
-// sequence.
-// The main point of having separate instruction are extra unmodelled effects
-// (compared to ordinary calls) like stack pointer change.
-
-def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
- "# dynamic stack allocation",
- [(X86MingwAlloca)]>;
-}
-
// Nop
let neverHasSideEffects = 1 in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -594,202 +571,22 @@
"nop{l}\t$zero", []>, TB;
}
-// Trap
-def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
-// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
-
-// PIC base construction. This expands to code that looks like this:
-// call $next_inst
-// popl %destreg"
-let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
- def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
- "", []>;
-
-//===----------------------------------------------------------------------===//
-// Control Flow Instructions.
-//
-
-// Return instructions.
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP in {
- def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
- "ret",
- [(X86retflag 0)]>;
- def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
- "ret\t$amt",
- [(X86retflag timm:$amt)]>;
- def LRET : I <0xCB, RawFrm, (outs), (ins),
- "lret", []>;
- def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lret\t$amt", []>;
-}
-
-// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
- def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp\t$dst", [(br bb:$dst)]>;
- def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst", []>;
-}
-
-// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
- multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
- def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
- def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
- }
-}
-
-defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
-defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
-
-// FIXME: What about the CX/RCX versions of this instruction?
-let Uses = [ECX], isBranch = 1, isTerminator = 1 in
- def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", []>;
-
-
-// Indirect branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
- def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
- def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
-
- def FARJMP16i : Iseg16<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "ljmp{w}\t$seg, $off", []>, OpSize;
- def FARJMP32i : Iseg32<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "ljmp{l}\t$seg, $off", []>;
-
- def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
- "ljmp{w}\t{*}$dst", []>, OpSize;
- def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
- "ljmp{l}\t{*}$dst", []>;
-}
-
-
-// Loop instructions
-
-def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
-
-//===----------------------------------------------------------------------===//
-// Call Instructions...
-//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. ESP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
- def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i32imm_pcrel:$dst,variable_ops),
- "call\t$dst", []>;
- def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
- "call\t{*}$dst", [(X86call GR32:$dst)]>;
- def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
- "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
-
- def FARCALL16i : Iseg16<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "lcall{w}\t$seg, $off", []>, OpSize;
- def FARCALL32i : Iseg32<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "lcall{l}\t$seg, $off", []>;
-
- def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
- "lcall{w}\t{*}$dst", []>, OpSize;
- def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
- "lcall{l}\t{*}$dst", []>;
-
- // callw for 16 bit code for the assembler.
- let isAsmParserOnly = 1 in
- def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
- (outs), (ins i16imm_pcrel:$dst, variable_ops),
- "callw\t$dst", []>, OpSize;
- }
// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>;
-def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", []>;
-
-// Tail call stuff.
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
- def TCRETURNdi : I<0, Pseudo, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri : I<0, Pseudo, (outs),
- (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- let mayLoad = 1 in
- def TCRETURNmi : I<0, Pseudo, (outs),
- (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
-
- // FIXME: The should be pseudo instructions that are lowered when going to
- // mcinst.
- def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL",
- []>;
- def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
- "", []>; // FIXME: Remove encoding when JIT is dead.
- let mayLoad = 1 in
- def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
- "jmp{l}\t{*}$dst # TAILCALL", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions...
-//
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", []>, Requires<[In32BitMode]>;
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
@@ -802,6 +599,10 @@
OpSize;
def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let mayStore = 1 in {
@@ -814,28 +615,53 @@
OpSize;
def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
-}
-}
-let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
"push{l}\t$imm", []>;
-def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{w}\t$imm", []>, OpSize;
-def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", []>;
-}
-let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
- Requires<[In32BitMode]>;
-}
-let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
+
+
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
mayLoad=1, neverHasSideEffects=1 in {
@@ -848,12 +674,16 @@
Requires<[In32BitMode]>;
}
-let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32
- def BSWAP32r : I<0xC8, AddRegFrm,
- (outs GR32:$dst), (ins GR32:$src),
- "bswap{l}\t$dst",
- [(set GR32:$dst, (bswap GR32:$src))]>, TB;
-
+let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
// Bit scan instructions.
let Defs = [EFLAGS] in {
@@ -870,6 +700,12 @@
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
@@ -884,44 +720,23 @@
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
-let neverHasSideEffects = 1 in
-def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
-let isReMaterializable = 1 in
-def LEA32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins i32mem:$src),
- "lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
-
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP;
-}
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
}
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP;
-
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
@@ -929,91 +744,24 @@
def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
-
-let Defs = [RAX, RDX] in
-def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
- TB;
-
-let Defs = [RAX, RCX, RDX] in
-def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
-def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
-}
-
-def SYSCALL : I<0x05, RawFrm,
- (outs), (ins), "syscall", []>, TB;
-def SYSRET : I<0x07, RawFrm,
- (outs), (ins), "sysret", []>, TB;
-def SYSENTER : I<0x34, RawFrm,
- (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT : I<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
-
-def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
//===----------------------------------------------------------------------===//
-// Input/Output Instructions...
+// Move Instructions.
//
-let Defs = [AL], Uses = [DX] in
-def IN8rr : I<0xEC, RawFrm, (outs), (ins),
- "in{b}\t{%dx, %al|%AL, %DX}", []>;
-let Defs = [AX], Uses = [DX] in
-def IN16rr : I<0xED, RawFrm, (outs), (ins),
- "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
-let Defs = [EAX], Uses = [DX] in
-def IN32rr : I<0xED, RawFrm, (outs), (ins),
- "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
-
-let Defs = [AL] in
-def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
- "in{b}\t{$port, %al|%AL, $port}", []>;
-let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
- "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
-let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
- "in{l}\t{$port, %eax|%EAX, $port}", []>;
-
-let Uses = [DX, AL] in
-def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
- "out{b}\t{%al, %dx|%DX, %AL}", []>;
-let Uses = [DX, AX] in
-def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
- "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
-let Uses = [DX, EAX] in
-def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
- "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
-
-let Uses = [AL] in
-def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
- "out{b}\t{%al, $port|$port, %AL}", []>;
-let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
- "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
-let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
- "out{l}\t{%eax, $port|$port, %EAX}", []>;
-
-def IN8 : I<0x6C, RawFrm, (outs), (ins),
- "ins{b}", []>;
-def IN16 : I<0x6D, RawFrm, (outs), (ins),
- "ins{w}", []>, OpSize;
-def IN32 : I<0x6D, RawFrm, (outs), (ins),
- "ins{l}", []>;
-//===----------------------------------------------------------------------===//
-// Move Instructions...
-//
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", []>;
@@ -1021,6 +769,8 @@
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
@@ -1032,6 +782,12 @@
def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
}
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
@@ -1043,6 +799,9 @@
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
@@ -1064,24 +823,22 @@
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, %eax}", []>,
Requires<[In32BitMode]>;
-
-// Moves to and from segment registers
-def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1090,6 +847,8 @@
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1102,6 +861,9 @@
def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
}
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
@@ -1113,24 +875,9 @@
def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store GR32:$src, addr:$dst)]>;
-
-/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- []>;
-
-let mayStore = 1 in
-def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- []>;
-}
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1151,2219 +898,6 @@
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
}
-// Moves to and from debug registers
-def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Fixed-Register Multiplication and Division Instructions...
-//
-
-// Extra precision multiplication
-
-// AL is really implied by AX, by the registers in Defs must match the
-// SDNode results (i8, i32).
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>; // AL,AH = AL*GR8
-
-let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
- "mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*GR16
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
-def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*GR32
-
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
- "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*[mem16]
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*[mem32]
-}
-
-let neverHasSideEffects = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
- // AL,AH = AL*GR8
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize; // AX,DX = AX*GR16
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
- // EAX,EDX = EAX*GR32
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
-}
-} // neverHasSideEffects
-
-// unsigned division/remainder
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "div{l}\t$src", []>;
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
- // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>;
-}
-
-// Signed division/remainder.
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "idiv{l}\t$src", []>;
-let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- // EDX:EAX/[mem32] = EAX,EDX
- "idiv{l}\t$src", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Two address Instructions.
-//
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
-let Predicates = [HasCMov] in {
-let isCommutable = 1 in {
-def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_B, EFLAGS))]>,
- TB, OpSize;
-def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_B, EFLAGS))]>,
- TB;
-def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovae{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_AE, EFLAGS))]>,
- TB, OpSize;
-def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovae{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_AE, EFLAGS))]>,
- TB;
-def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmove{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_E, EFLAGS))]>,
- TB, OpSize;
-def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmove{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_E, EFLAGS))]>,
- TB;
-def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovne{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NE, EFLAGS))]>,
- TB, OpSize;
-def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovne{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NE, EFLAGS))]>,
- TB;
-def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_BE, EFLAGS))]>,
- TB, OpSize;
-def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_BE, EFLAGS))]>,
- TB;
-def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmova{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_A, EFLAGS))]>,
- TB, OpSize;
-def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmova{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_A, EFLAGS))]>,
- TB;
-def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_L, EFLAGS))]>,
- TB, OpSize;
-def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_L, EFLAGS))]>,
- TB;
-def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovge{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_GE, EFLAGS))]>,
- TB, OpSize;
-def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovge{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_GE, EFLAGS))]>,
- TB;
-def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovle{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_LE, EFLAGS))]>,
- TB, OpSize;
-def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovle{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_LE, EFLAGS))]>,
- TB;
-def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovg{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_G, EFLAGS))]>,
- TB, OpSize;
-def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovg{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_G, EFLAGS))]>,
- TB;
-def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovs{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_S, EFLAGS))]>,
- TB, OpSize;
-def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovs{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_S, EFLAGS))]>,
- TB;
-def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovns{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NS, EFLAGS))]>,
- TB, OpSize;
-def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovns{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NS, EFLAGS))]>,
- TB;
-def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_P, EFLAGS))]>,
- TB, OpSize;
-def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_P, EFLAGS))]>,
- TB;
-def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NP, EFLAGS))]>,
- TB, OpSize;
-def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NP, EFLAGS))]>,
- TB;
-def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovo{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_O, EFLAGS))]>,
- TB, OpSize;
-def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovo{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_O, EFLAGS))]>,
- TB;
-def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovno{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NO, EFLAGS))]>,
- TB, OpSize;
-def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovno{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NO, EFLAGS))]>,
- TB;
-} // isCommutable = 1
-
-def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_B, EFLAGS))]>,
- TB, OpSize;
-def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_B, EFLAGS))]>,
- TB;
-def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovae{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_AE, EFLAGS))]>,
- TB, OpSize;
-def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovae{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_AE, EFLAGS))]>,
- TB;
-def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmove{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_E, EFLAGS))]>,
- TB, OpSize;
-def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmove{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_E, EFLAGS))]>,
- TB;
-def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovne{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NE, EFLAGS))]>,
- TB, OpSize;
-def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovne{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NE, EFLAGS))]>,
- TB;
-def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_BE, EFLAGS))]>,
- TB, OpSize;
-def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_BE, EFLAGS))]>,
- TB;
-def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmova{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_A, EFLAGS))]>,
- TB, OpSize;
-def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmova{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_A, EFLAGS))]>,
- TB;
-def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_L, EFLAGS))]>,
- TB, OpSize;
-def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_L, EFLAGS))]>,
- TB;
-def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovge{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_GE, EFLAGS))]>,
- TB, OpSize;
-def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovge{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_GE, EFLAGS))]>,
- TB;
-def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovle{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_LE, EFLAGS))]>,
- TB, OpSize;
-def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovle{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_LE, EFLAGS))]>,
- TB;
-def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovg{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_G, EFLAGS))]>,
- TB, OpSize;
-def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovg{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_G, EFLAGS))]>,
- TB;
-def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovs{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_S, EFLAGS))]>,
- TB, OpSize;
-def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovs{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_S, EFLAGS))]>,
- TB;
-def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovns{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NS, EFLAGS))]>,
- TB, OpSize;
-def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovns{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NS, EFLAGS))]>,
- TB;
-def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_P, EFLAGS))]>,
- TB, OpSize;
-def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_P, EFLAGS))]>,
- TB;
-def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
- TB, OpSize;
-def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
- TB;
-def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovo{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_O, EFLAGS))]>,
- TB, OpSize;
-def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovo{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_O, EFLAGS))]>,
- TB;
-def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovno{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NO, EFLAGS))]>,
- TB, OpSize;
-def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovno{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NO, EFLAGS))]>,
- TB;
-} // Predicates = [HasCMov]
-
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
-def CMOV_GR8 : I<0, Pseudo,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
- "#CMOV_GR8 PSEUDO!",
- [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
- imm:$cond, EFLAGS))]>;
-
-let Predicates = [NoCMov] in {
-def CMOV_GR32 : I<0, Pseudo,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
- "#CMOV_GR32* PSEUDO!",
- [(set GR32:$dst,
- (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_GR16 : I<0, Pseudo,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
- "#CMOV_GR16* PSEUDO!",
- [(set GR16:$dst,
- (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_RFP32 : I<0, Pseudo,
- (outs RFP32:$dst),
- (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
- "#CMOV_RFP32 PSEUDO!",
- [(set RFP32:$dst,
- (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
- EFLAGS))]>;
-def CMOV_RFP64 : I<0, Pseudo,
- (outs RFP64:$dst),
- (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
- "#CMOV_RFP64 PSEUDO!",
- [(set RFP64:$dst,
- (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
- EFLAGS))]>;
-def CMOV_RFP80 : I<0, Pseudo,
- (outs RFP80:$dst),
- (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
- "#CMOV_RFP80 PSEUDO!",
- [(set RFP80:$dst,
- (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
- EFLAGS))]>;
-} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
-} // Uses = [EFLAGS]
-
-
-// unary instructions
-let CodeSize = 2 in {
-let Defs = [EFLAGS] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
- "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src1)),
- (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src1)),
- (implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src1)),
- (implicit EFLAGS)]>;
-
-let Constraints = "" in {
- def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
- "neg{b}\t$dst",
- [(store (ineg (loadi8 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
- def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
- "neg{w}\t$dst",
- [(store (ineg (loadi16 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
- "neg{l}\t$dst",
- [(store (ineg (loadi32 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Match xor -1 to not. Favors these over a move imm + xor to save code size.
-let AddedComplexity = 15 in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
- "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src1))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src1))]>;
-}
-let Constraints = "" in {
- def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
- "not{b}\t$dst",
- [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
- def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
- "not{w}\t$dst",
- [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
- def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
- "not{l}\t$dst",
- [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-} // Constraints = ""
-} // CodeSize
-
-// TODO: inc/dec is slow for P4, but fast for Pentium-M.
-let Defs = [EFLAGS] in {
-let CodeSize = 2 in
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
-
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
- OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
- Requires<[In32BitMode]>;
-}
-let Constraints = "", CodeSize = 2 in {
- def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
- [(store (add (loadi8 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
- def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In32BitMode]>;
- def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-
-let CodeSize = 2 in
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
- OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
- Requires<[In32BitMode]>;
-} // CodeSize = 2
-
-let Constraints = "", CodeSize = 2 in {
- def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
- [(store (add (loadi8 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
- def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In32BitMode]>;
- def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-} // Defs = [EFLAGS]
-
-// Logical operators...
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
-def AND8rr : I<0x20, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
-def AND16rr : I<0x21, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- GR32:$src2))]>;
-}
-
-// AND instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}", []>;
-def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def AND8rm : I<0x22, MRMSrcMem,
- (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
- (loadi8 addr:$src2)))]>;
-def AND16rm : I<0x23, MRMSrcMem,
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- (loadi16 addr:$src2)))]>,
- OpSize;
-def AND32rm : I<0x23, MRMSrcMem,
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- (loadi32 addr:$src2)))]>;
-
-def AND8ri : Ii8<0x80, MRM4r,
- (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
- imm:$src2))]>;
-def AND16ri : Ii16<0x81, MRM4r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def AND32ri : Ii32<0x81, MRM4r,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- imm:$src2))]>;
-def AND16ri8 : Ii8<0x83, MRM4r,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- i16immSExt8:$src2))]>,
- OpSize;
-def AND32ri8 : Ii8<0x83, MRM4r,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def AND8mr : I<0x20, MRMDestMem,
- (outs), (ins i8mem :$dst, GR8 :$src),
- "and{b}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mr : I<0x21, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mr : I<0x21, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND8mi : Ii8<0x80, MRM4m,
- (outs), (ins i8mem :$dst, i8imm :$src),
- "and{b}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mi : Ii16<0x81, MRM4m,
- (outs), (ins i16mem:$dst, i16imm:$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mi : Ii32<0x81, MRM4m,
- (outs), (ins i32mem:$dst, i32imm:$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mi8 : Ii8<0x83, MRM4m,
- (outs), (ins i16mem:$dst, i16i8imm :$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mi8 : Ii8<0x83, MRM4m,
- (outs), (ins i32mem:$dst, i32i8imm :$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
- "and{b}\t{$src, %al|%al, $src}", []>;
- def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
- "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
- "and{l}\t{$src, %eax|%eax, $src}", []>;
-
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
-def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
-def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
- OpSize;
-def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
-}
-
-// OR instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}", []>;
-def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
- (load addr:$src2)))]>;
-def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- (load addr:$src2)))]>,
- OpSize;
-def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst),
- (ins GR8 :$src1, i8imm:$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
-def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- imm:$src2))]>;
-
-def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- i16immSExt8:$src2))]>, OpSize;
-def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-let Constraints = "" in {
- def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
- "or{b}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
- "or{b}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
- "or{b}\t{$src, %al|%al, $src}", []>;
- def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
- "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
- "or{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
- def XOR8rr : I<0x30, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
- GR8:$src2))]>;
- def XOR16rr : I<0x31, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
- def XOR32rr : I<0x31, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- GR32:$src2))]>;
-} // isCommutable = 1
-
-// XOR instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def XOR8rm : I<0x32, MRMSrcMem,
- (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
- (load addr:$src2)))]>;
-def XOR16rm : I<0x33, MRMSrcMem,
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- (load addr:$src2)))]>,
- OpSize;
-def XOR32rm : I<0x33, MRMSrcMem,
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-def XOR8ri : Ii8<0x80, MRM6r,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
-def XOR16ri : Ii16<0x81, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def XOR32ri : Ii32<0x81, MRM6r,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- imm:$src2))]>;
-def XOR16ri8 : Ii8<0x83, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- i16immSExt8:$src2))]>,
- OpSize;
-def XOR32ri8 : Ii8<0x83, MRM6r,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def XOR8mr : I<0x30, MRMDestMem,
- (outs), (ins i8mem :$dst, GR8 :$src),
- "xor{b}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mr : I<0x31, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mr : I<0x31, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR8mi : Ii8<0x80, MRM6m,
- (outs), (ins i8mem :$dst, i8imm :$src),
- "xor{b}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mi : Ii16<0x81, MRM6m,
- (outs), (ins i16mem:$dst, i16imm:$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mi : Ii32<0x81, MRM6m,
- (outs), (ins i32mem:$dst, i32imm:$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mi8 : Ii8<0x83, MRM6m,
- (outs), (ins i16mem:$dst, i16i8imm :$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mi8 : Ii8<0x83, MRM6m,
- (outs), (ins i32mem:$dst, i32i8imm :$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
- "xor{b}\t{$src, %al|%al, $src}", []>;
- def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
- "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
- "xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Shift instructions
-let Defs = [EFLAGS] in {
-let Uses = [CL] in {
-def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
- "shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src1, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src1, CL))]>;
-} // Uses = [CL]
-
-def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "shl{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "shl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "shl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-
-def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
- "shl{b}\t$dst", []>;
-def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t$dst", []>, OpSize;
-def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t$dst", []>;
-
-} // isConvertibleToThreeAddress = 1
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
- "shl{b}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
- "shl{w}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
- "shl{l}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t$dst",
- [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t$dst",
- [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t$dst",
- [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
- "shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src1, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src1, CL))]>;
-}
-
-def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "shr{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
-def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "shr{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "shr{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
- "shr{b}\t$dst",
- [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
-def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t$dst",
- [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
-def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t$dst",
- [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
- OpSize;
- def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
- "shr{b}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
- "shr{w}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
- "shr{l}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t$dst",
- [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t$dst",
- [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
- def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t$dst",
- [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src1, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src1, CL))]>;
-}
-
-def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "sar{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
-def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "sar{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "sar{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t$dst",
- [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
-def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t$dst",
- [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
-def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t$dst",
- [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
- "sar{b}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
- "sar{w}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
- "sar{l}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t$dst",
- [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t$dst",
- [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t$dst",
- [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-// Rotate instructions
-
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Constraints = "" in {
-def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = ""
-
-// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
-def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
-}
-
-def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "rol{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
-def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "rol{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "rol{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t$dst",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
-def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t$dst",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
-def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t$dst",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
- "rol{b}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
- "rol{w}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
- "rol{l}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Rotate by 1
- def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t$dst",
- [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t$dst",
- [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t$dst",
- [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
-}
-
-def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "ror{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
-def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "ror{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "ror{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t$dst",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
-def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t$dst",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
-def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t$dst",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
- def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
- "ror{b}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
- "ror{w}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
- "ror{l}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Rotate by 1
- def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-
-// Double shift instructions (generalizations of rotate)
-let Uses = [CL] in {
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
- TB, OpSize;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
- TB, OpSize;
-}
-
-let isCommutable = 1 in { // These instructions commute to each other.
-def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
- TB, OpSize;
-def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
- TB, OpSize;
-}
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
- def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
- }
- def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
- def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-
- let Uses = [CL] in {
- def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
- def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
- }
- def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB, OpSize;
- def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB, OpSize;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-
-// Arithmetic.
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
-// Register-Register Addition
-def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-// Register-Register Addition
-def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
-def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
- GR32:$src2))]>;
-} // end isConvertibleToThreeAddress
-} // end isCommutable
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}", []>;
- def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
- def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Addition
-def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
- (load addr:$src2)))]>;
-def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
- (load addr:$src2)))]>, OpSize;
-def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-// Register-Integer Addition
-def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86add_flag GR8:$src1, imm:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-// Register-Integer Addition
-def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
-def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86add_flag GR32:$src1, imm:$src2))]>;
-def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
-}
-
-let Constraints = "" in {
- // Memory-Register Addition
- def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i16immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i32immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
- // addition to rAX
- def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
- "add{b}\t{$src, %al|%al, $src}", []>;
- def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
- "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
- "add{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
-def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
-def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
-def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
-}
-
-let isCodeGenOnly = 1 in {
-def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}", []>;
-def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
- (ins GR8:$src1, i8mem:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
-def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
- OpSize;
-def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
-def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
-def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
-def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
-def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
- def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
- def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
- def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
- def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
- def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
- def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
- def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
- "adc{b}\t{$src, %al|%al, $src}", []>;
- def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
- "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
- "adc{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Uses = [EFLAGS]
-
-// Register-Register Subtraction
-def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, GR8:$src2))]>;
-def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
-def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, GR32:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
-def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
-def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
- (ins GR8:$src1, i8imm:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, imm:$src2))]>;
-def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
-def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, imm:$src2))]>;
-def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- // Memory-Register Subtraction
- def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
- // Memory-Integer Subtraction
- def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i16immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i32immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
- def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
- "sub{b}\t{$src, %al|%al, $src}", []>;
- def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
- "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
- "sub{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
- (ins GR8:$src1, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
-def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
-def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-
-let Constraints = "" in {
- def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
- def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
- def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
- def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
- def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
- def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
- def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
- def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
- "sbb{b}\t{$src, %al|%al, $src}", []>;
- def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
- "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
- "sbb{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let isCodeGenOnly = 1 in {
-def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
-def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
- OpSize;
-def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
-def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
-def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
-def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
-def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
-// Register-Register Signed Integer Multiply
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
-}
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
- TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
-} // Defs = [EFLAGS]
-} // end Two Address instructions
-
-// Suprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
-// Register-Integer Signed Integer Multiply
-def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
-def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, imm:$src2))]>;
-def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-// Memory-Integer Signed Integer Multiply
-def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
- (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>,
- OpSize;
-def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
- (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>;
-def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
- (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
-def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
- (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Test instructions are just like AND, except they don't generate a result.
-//
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // TEST X, Y --> TEST Y, X
-def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
-def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
- 0))]>,
- OpSize;
-def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
- 0))]>;
-}
-
-def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
- "test{b}\t{$src, %al|%al, $src}", []>;
-def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
- "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
- "test{l}\t{$src, %eax|%eax, $src}", []>;
-
-def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
- 0))]>;
-def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR16:$src1,
- (loadi16 addr:$src2)), 0))]>, OpSize;
-def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR32:$src1,
- (loadi32 addr:$src2)), 0))]>;
-
-def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8
- (outs), (ins GR8:$src1, i8imm:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
-def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16
- (outs), (ins GR16:$src1, i16imm:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
- OpSize;
-def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32
- (outs), (ins GR32:$src1, i32imm:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
-
-def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8
- (outs), (ins i8mem:$src1, i8imm:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
- 0))]>;
-def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16
- (outs), (ins i16mem:$src1, i16imm:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
- 0))]>, OpSize;
-def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32
- (outs), (ins i32mem:$src1, i32imm:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
- 0))]>;
-} // Defs = [EFLAGS]
-
// Condition code ops, incl. set if equal/not equal/...
let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
@@ -3371,305 +905,10 @@
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
-let Uses = [EFLAGS] in {
-// Use sbb to materialize carry bit.
-let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
- OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-} // isCodeGenOnly
-
-def SETEr : I<0x94, MRM0r,
- (outs GR8 :$dst), (ins),
- "sete\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
- TB; // GR8 = ==
-def SETEm : I<0x94, MRM0m,
- (outs), (ins i8mem:$dst),
- "sete\t$dst",
- [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = ==
-
-def SETNEr : I<0x95, MRM0r,
- (outs GR8 :$dst), (ins),
- "setne\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
- TB; // GR8 = !=
-def SETNEm : I<0x95, MRM0m,
- (outs), (ins i8mem:$dst),
- "setne\t$dst",
- [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = !=
-
-def SETLr : I<0x9C, MRM0r,
- (outs GR8 :$dst), (ins),
- "setl\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
- TB; // GR8 = < signed
-def SETLm : I<0x9C, MRM0m,
- (outs), (ins i8mem:$dst),
- "setl\t$dst",
- [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = < signed
-
-def SETGEr : I<0x9D, MRM0r,
- (outs GR8 :$dst), (ins),
- "setge\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
- TB; // GR8 = >= signed
-def SETGEm : I<0x9D, MRM0m,
- (outs), (ins i8mem:$dst),
- "setge\t$dst",
- [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = >= signed
-
-def SETLEr : I<0x9E, MRM0r,
- (outs GR8 :$dst), (ins),
- "setle\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
- TB; // GR8 = <= signed
-def SETLEm : I<0x9E, MRM0m,
- (outs), (ins i8mem:$dst),
- "setle\t$dst",
- [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <= signed
-
-def SETGr : I<0x9F, MRM0r,
- (outs GR8 :$dst), (ins),
- "setg\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
- TB; // GR8 = > signed
-def SETGm : I<0x9F, MRM0m,
- (outs), (ins i8mem:$dst),
- "setg\t$dst",
- [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = > signed
-
-def SETBr : I<0x92, MRM0r,
- (outs GR8 :$dst), (ins),
- "setb\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
- TB; // GR8 = < unsign
-def SETBm : I<0x92, MRM0m,
- (outs), (ins i8mem:$dst),
- "setb\t$dst",
- [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = < unsign
-
-def SETAEr : I<0x93, MRM0r,
- (outs GR8 :$dst), (ins),
- "setae\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
- TB; // GR8 = >= unsign
-def SETAEm : I<0x93, MRM0m,
- (outs), (ins i8mem:$dst),
- "setae\t$dst",
- [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = >= unsign
-
-def SETBEr : I<0x96, MRM0r,
- (outs GR8 :$dst), (ins),
- "setbe\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
- TB; // GR8 = <= unsign
-def SETBEm : I<0x96, MRM0m,
- (outs), (ins i8mem:$dst),
- "setbe\t$dst",
- [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <= unsign
-
-def SETAr : I<0x97, MRM0r,
- (outs GR8 :$dst), (ins),
- "seta\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
- TB; // GR8 = > signed
-def SETAm : I<0x97, MRM0m,
- (outs), (ins i8mem:$dst),
- "seta\t$dst",
- [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = > signed
-
-def SETSr : I<0x98, MRM0r,
- (outs GR8 :$dst), (ins),
- "sets\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
- TB; // GR8 = <sign bit>
-def SETSm : I<0x98, MRM0m,
- (outs), (ins i8mem:$dst),
- "sets\t$dst",
- [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <sign bit>
-def SETNSr : I<0x99, MRM0r,
- (outs GR8 :$dst), (ins),
- "setns\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
- TB; // GR8 = !<sign bit>
-def SETNSm : I<0x99, MRM0m,
- (outs), (ins i8mem:$dst),
- "setns\t$dst",
- [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = !<sign bit>
-
-def SETPr : I<0x9A, MRM0r,
- (outs GR8 :$dst), (ins),
- "setp\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
- TB; // GR8 = parity
-def SETPm : I<0x9A, MRM0m,
- (outs), (ins i8mem:$dst),
- "setp\t$dst",
- [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = parity
-def SETNPr : I<0x9B, MRM0r,
- (outs GR8 :$dst), (ins),
- "setnp\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
- TB; // GR8 = not parity
-def SETNPm : I<0x9B, MRM0m,
- (outs), (ins i8mem:$dst),
- "setnp\t$dst",
- [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = not parity
-
-def SETOr : I<0x90, MRM0r,
- (outs GR8 :$dst), (ins),
- "seto\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
- TB; // GR8 = overflow
-def SETOm : I<0x90, MRM0m,
- (outs), (ins i8mem:$dst),
- "seto\t$dst",
- [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = overflow
-def SETNOr : I<0x91, MRM0r,
- (outs GR8 :$dst), (ins),
- "setno\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
- TB; // GR8 = not overflow
-def SETNOm : I<0x91, MRM0m,
- (outs), (ins i8mem:$dst),
- "setno\t$dst",
- [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = not overflow
-} // Uses = [EFLAGS]
-
-
-// Integer comparisons
-let Defs = [EFLAGS] in {
-def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
- "cmp{b}\t{$src, %al|%al, $src}", []>;
-def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
- "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
- "cmp{l}\t{$src, %eax|%eax, $src}", []>;
-
-def CMP8rr : I<0x38, MRMDestReg,
- (outs), (ins GR8 :$src1, GR8 :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
-def CMP16rr : I<0x39, MRMDestReg,
- (outs), (ins GR16:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
-def CMP32rr : I<0x39, MRMDestReg,
- (outs), (ins GR32:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
-def CMP8mr : I<0x38, MRMDestMem,
- (outs), (ins i8mem :$src1, GR8 :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
-def CMP16mr : I<0x39, MRMDestMem,
- (outs), (ins i16mem:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
- OpSize;
-def CMP32mr : I<0x39, MRMDestMem,
- (outs), (ins i32mem:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
-def CMP8rm : I<0x3A, MRMSrcMem,
- (outs), (ins GR8 :$src1, i8mem :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
-def CMP16rm : I<0x3B, MRMSrcMem,
- (outs), (ins GR16:$src1, i16mem:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
- OpSize;
-def CMP32rm : I<0x3B, MRMSrcMem,
- (outs), (ins GR32:$src1, i32mem:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
- def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
- def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
-}
-def CMP8ri : Ii8<0x80, MRM7r,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
-def CMP16ri : Ii16<0x81, MRM7r,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
-def CMP32ri : Ii32<0x81, MRM7r,
- (outs), (ins GR32:$src1, i32imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
-def CMP8mi : Ii8 <0x80, MRM7m,
- (outs), (ins i8mem :$src1, i8imm :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
-def CMP16mi : Ii16<0x81, MRM7m,
- (outs), (ins i16mem:$src1, i16imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
- OpSize;
-def CMP32mi : Ii32<0x81, MRM7m,
- (outs), (ins i32mem:$src1, i32imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
-def CMP16ri8 : Ii8<0x83, MRM7r,
- (outs), (ins GR16:$src1, i16i8imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def CMP16mi8 : Ii8<0x83, MRM7m,
- (outs), (ins i16mem:$src1, i16i8imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
-def CMP32mi8 : Ii8<0x83, MRM7m,
- (outs), (ins i32mem:$src1, i32i8imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
- i32immSExt8:$src2))]>;
-def CMP32ri8 : Ii8<0x83, MRM7r,
- (outs), (ins GR32:$src1, i32i8imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
-// Bit tests.
-// TODO: BTC, BTR, and BTS
let Defs = [EFLAGS] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3677,6 +916,9 @@
def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
@@ -3684,17 +926,23 @@
// only for now.
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
// (implicit EFLAGS)]
[]
>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi32 addr:$src1), GR32:$src2),
// (implicit EFLAGS)]
[]
>, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB;
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3703,6 +951,10 @@
def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
@@ -3714,307 +966,132 @@
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ i64immSExt8:$src2))]>, TB;
+
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // Defs = [EFLAGS]
-// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
- "movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))]>, TB;
-def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
- "movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
-def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
- "movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))]>, TB;
-def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))]>, TB;
-def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
-def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
- "movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))]>, TB;
-def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
-
-// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
-// except that they use GR32_NOREX for the output operand register class
-// instead of GR32. This allows them to operate on h registers on x86-64.
-def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
- (outs GR32_NOREX:$dst), (ins GR8:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- []>, TB;
-let mayLoad = 1 in
-def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
- (outs GR32_NOREX:$dst), (ins i8mem:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- []>, TB;
-
-let neverHasSideEffects = 1 in {
- let Defs = [AX], Uses = [AL] in
- def CBW : I<0x98, RawFrm, (outs), (ins),
- "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
- let Defs = [EAX], Uses = [AX] in
- def CWDE : I<0x98, RawFrm, (outs), (ins),
- "{cwtl|cwde}", []>; // EAX = signext(AX)
-
- let Defs = [AX,DX], Uses = [AX] in
- def CWD : I<0x99, RawFrm, (outs), (ins),
- "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
- let Defs = [EAX,EDX], Uses = [EAX] in
- def CDQ : I<0x99, RawFrm, (outs), (ins),
- "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map movr0 to xor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: Set encoding to pseudo.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isCodeGenOnly = 1 in {
-def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)]>;
-
-// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
-// encoding and avoids a partial-register update sometimes, but doing so
-// at isel time interferes with rematerialization in the current register
-// allocator. For now, this is rewritten when the instruction is lowered
-// to an MCInst.
-def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
- "",
- [(set GR16:$dst, 0)]>, OpSize;
-
-// FIXME: Set encoding to pseudo.
-def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. ESP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "leal\t$sym, %eax; "
- "call\t___tls_get_addr at PLT",
- [(X86tlsaddr tls32addr:$sym)]>,
- Requires<[In32BitMode]>;
-
-// Darwin TLS Support
-// For i386, the address of the thunk is passed on the stack, on return the
-// address of the variable is in %eax. %ecx is trashed during the function
-// call. All other registers are preserved.
-let Defs = [EAX, ECX],
- Uses = [ESP],
- usesCustomInserter = 1 in
-def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "# TLSCall_32",
- [(X86TLSCall addr:$sym)]>,
- Requires<[In32BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movl\t%gs:$src, $dst",
- [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movl\t%fs:$src, $dst",
- [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
- "ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)]>;
-
-}
//===----------------------------------------------------------------------===//
// Atomic support
//
-// Memory barriers
-let hasSideEffects = 1 in {
-def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
- "#MEMBARRIER",
- [(X86MemBarrier)]>, Requires<[HasSSE2]>;
-
-// TODO: Get this to fold the constant into the instruction.
-let Uses = [ESP] in
-def Int_MemBarrierNoSSE : I<0x0B, Pseudo, (outs), (ins GR32:$zero),
- "lock\n\t"
- "or{l}\t{$zero, (%esp)|(%esp), $zero}",
- [(X86MemBarrierNoSSE GR32:$zero)]>, LOCK;
-}
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- "xchg{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- "xchg{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
- OpSize;
def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
[(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val,i64mem:$ptr),
+ "xchg{q}\t{$val, $ptr|$ptr, $val}",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
- "xchg{l}\t{$val, $src|$src, $val}", []>;
-def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
- "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
"xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+ "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+ "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+ "xchg{q}\t{$val, $src|$src, $val}", []>;
}
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
"xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
"xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-// Atomic compare and swap.
-let Defs = [EAX, EFLAGS], Uses = [EAX] in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
- "lock\n\t"
- "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
-}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
- "lock\n\t"
- "cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
-let Defs = [AX, EFLAGS], Uses = [AX] in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
- "lock\n\t"
- "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
-}
-let Defs = [AL, EFLAGS], Uses = [AL] in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
- "lock\n\t"
- "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
-}
-
-// Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS] in {
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
- "lock\n\t"
- "xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
- TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
- "lock\n\t"
- "xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
- TB, OpSize, LOCK;
-def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "lock\n\t"
- "xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
- TB, LOCK;
-}
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -4022,6 +1099,8 @@
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4030,6 +1109,9 @@
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
@@ -4038,6 +1120,8 @@
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4046,281 +1130,20 @@
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
"cmpxchg8b\t$dst", []>, TB;
-// Optimized codegen when the non-memory output is not used.
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "inc{b}\t$dst", []>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "inc{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "inc{l}\t$dst", []>, LOCK;
-
-def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "dec{b}\t$dst", []>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "dec{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "dec{l}\t$dst", []>, LOCK;
-}
-
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMXOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMNAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
- "#ATOMMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMXOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMNAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
- "#ATOMMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMXOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMNAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-}
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", []>, TB;
-let Constraints = "$val1 = $dst1, $val2 = $dst2",
- Defs = [EFLAGS, EAX, EBX, ECX, EDX],
- Uses = [EAX, EBX, ECX, EDX],
- mayLoad = 1, mayStore = 1,
- usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSWAP6432 PSEUDO!", []>;
-}
-// Segmentation support instructions.
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
- "ltr{w}\t{$src}", []>, TB;
-def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
- "ltr{w}\t{$src}", []>, TB;
-
-def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
- "push{w}\t%fs", []>, OpSize, TB;
-def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
- "push{l}\t%fs", []>, TB;
-def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
- "push{w}\t%gs", []>, OpSize, TB;
-def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
- "push{l}\t%gs", []>, TB;
-
-def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
- "pop{w}\t%fs", []>, OpSize, TB;
-def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
- "pop{l}\t%fs", []>, TB;
-def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
- "pop{w}\t%gs", []>, OpSize, TB;
-def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
- "pop{l}\t%gs", []>, TB;
-
-def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lds{l}\t{$src, $dst|$dst, $src}", []>;
-def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "les{l}\t{$src, $dst|$dst, $src}", []>;
-def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
- "verr\t$seg", []>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
- "verr\t$seg", []>, TB;
-def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
- "verw\t$seg", []>, TB;
-def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
- "verw\t$seg", []>, TB;
-
-// Descriptor-table support instructions
-
-def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdt\t$dst", []>, TB;
-def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
- "sidt\t$dst", []>, TB;
-def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
-def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
-def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdt\t$src", []>, TB;
-def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidt\t$src", []>, TB;
-def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
- "lldt{w}\t$src", []>, TB;
-def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
- "lldt{w}\t$src", []>, TB;
-
// Lock instruction prefix
def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
@@ -4333,35 +1156,19 @@
def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
}
-// Segment override instruction prefixes
-def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
-def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
-def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
-def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
-def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
-def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// String manipulation instructions
-
def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
-// CPU flow control instructions
-
-def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
-def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
-
-// FPU control instructions
-
-def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
// Flag instructions
-
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
@@ -4373,620 +1180,77 @@
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
// Table lookup instructions
-
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
-// Specialized register support
-
-def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
-def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
-
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
- "smsw{w}\t$dst", []>, OpSize, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
- "smsw{l}\t$dst", []>, TB;
-// For memory operands, there is only a 16-bit form
-def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
- "smsw{w}\t$dst", []>, TB;
-
-def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
- "lmsw{w}\t$src", []>, TB;
-def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
- "lmsw{w}\t$src", []>, TB;
-
-def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
-
-// Cache instructions
-
-def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
-
-// VMX instructions
-
-// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
-// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
-// 0F 01 C1
-def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
-def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmclear\t$vmcs", []>, OpSize, TB;
-// 0F 01 C2
-def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
-// 0F 01 C3
-def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
-def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmptrld\t$vmcs", []>, TB;
-def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
- "vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-// 0F 01 C4
-def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
-def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
- "vmxon\t{$vmxon}", []>, XS;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
-def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
-def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
-def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
-
-def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
- (ADD32ri GR32:$src1, tconstpool:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
- (ADD32ri GR32:$src1, tjumptable:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
- (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
- (ADD32ri GR32:$src1, texternalsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
- (ADD32ri GR32:$src1, tblockaddress:$src2)>;
-
-def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
- (MOV32mi addr:$dst, tglobaladdr:$src)>;
-def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
- (MOV32mi addr:$dst, texternalsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
- (MOV32mi addr:$dst, tblockaddress:$src)>;
-
-// Calls
-// tailcall stuff
-def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
- (TCRETURNri GR32_TC:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-// FIXME: This is disabled for 32-bit PIC mode because the global base
-// register which is part of the address mode may be assigned a
-// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
- (TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[In32BitMode, IsNotPIC]>;
-
-def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-// Normal calls, with various flavors of addresses.
-def : Pat<(X86call (i32 tglobaladdr:$dst)),
- (CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86call (i32 texternalsym:$dst)),
- (CALLpcrel32 texternalsym:$dst)>;
-def : Pat<(X86call (i32 imm:$dst)),
- (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(addc GR32:$src1, (load addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(addc GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(subc GR32:$src1, GR32:$src2),
- (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(subc GR32:$src1, (load addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(subc GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR8:$src1, 0),
- (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(X86cmp GR16:$src1, 0),
- (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86cmp GR32:$src1, 0),
- (TEST32rr GR32:$src1, GR32:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
- (CMOVAE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
- (CMOVAE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
- (CMOVB16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
- (CMOVB32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
- (CMOVNE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
- (CMOVNE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
- (CMOVE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
- (CMOVE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
- (CMOVA16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
- (CMOVA32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
- (CMOVBE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
- (CMOVBE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
- (CMOVGE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
- (CMOVGE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
- (CMOVL16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
- (CMOVL32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
- (CMOVG16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
- (CMOVG32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
- (CMOVLE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
- (CMOVLE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
- (CMOVNP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
- (CMOVNP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
- (CMOVP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
- (CMOVP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
- (CMOVNS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
- (CMOVNS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
- (CMOVS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
- (CMOVS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
- (CMOVNO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
- (CMOVNO32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
- (CMOVO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
- (CMOVO32rm GR32:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-
-// extload bool -> extload byte
-def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
-
-// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
-def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
-
-
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>,
+ Requires<[In32BitMode]>;
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR16:$src1, 128),
- (SUB16ri8 GR16:$src1, -128)>;
-def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
- (SUB16mi8 addr:$dst, -128)>;
-def : Pat<(add GR32:$src1, 128),
- (SUB32ri8 GR32:$src1, -128)>;
-def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
- (SUB32mi8 addr:$dst, -128)>;
-
-// r & (2^16-1) ==> movz
-def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
- GR32_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
- GR16_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-
-// trunc patterns
-def : Pat<(i16 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit)>,
- Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit)>,
- Requires<[In32BitMode]>;
-
-// h-register tricks
-def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)>,
- Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi)>,
- Requires<[In32BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
- (EXTRACT_SUBREG
- (MOVZX32rr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_16bit)>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-
-// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
- (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
- (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
- (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL32mCL addr:$dst)>;
-
-def : Pat<(srl GR8:$src1, (and CL, 31)),
- (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
- (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
- (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR32mCL addr:$dst)>;
-
-def : Pat<(sra GR8:$src1, (and CL, 31)),
- (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
- (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
- (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR32mCL addr:$dst)>;
-
-// (anyext (setcc_carry)) -> (setcc_carry)
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR16:$src1, imm:$src2),
- (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(or_is_add GR16:$src1, GR16:$src2),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or_is_add GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-} // AddedComplexity
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
+// Subsystems.
//===----------------------------------------------------------------------===//
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
- (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
- (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
- (IMUL32rr GR32:$src1, GR32:$src2)>;
-
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
- (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
- (IMUL32rm GR32:$src1, addr:$src2)>;
-
-// mul reg, imm
-def : Pat<(mul GR16:$src1, imm:$src2),
- (IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(mul GR32:$src1, imm:$src2),
- (IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
- (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
- (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// reg = mul mem, imm
-def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
- (IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
- (IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
- (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
- (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
-
-// Patterns for nodes that do not produce flags, for instructions that do.
-
-// Increment reg.
-def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>;
-def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// Decrement reg.
-def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>;
-def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
- (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
- (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
- (OR32rm GR32:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, i16immSExt8:$src2),
- (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or GR32:$src1, i32immSExt8:$src2),
- (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
- (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
- (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
- (XOR32rm GR32:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
- (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
- (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
- (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
- (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
- (AND32rm GR32:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
- (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
- (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
- (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, i16immSExt8:$src2),
- (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(and GR32:$src1, i32immSExt8:$src2),
- (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// Floating Point Stack Support
-//===----------------------------------------------------------------------===//
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
+// X87 Floating Point Stack.
include "X86InstrFPStack.td"
-//===----------------------------------------------------------------------===//
-// X86-64 Support
-//===----------------------------------------------------------------------===//
-
-include "X86Instr64bit.td"
-
-//===----------------------------------------------------------------------===//
// SIMD support (SSE, MMX and AVX)
-//===----------------------------------------------------------------------===//
-
include "X86InstrFragmentsSIMD.td"
-//===----------------------------------------------------------------------===//
// FMA - Fused Multiply-Add support (requires FMA)
-//===----------------------------------------------------------------------===//
-
include "X86InstrFMA.td"
-//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
-//===----------------------------------------------------------------------===//
-
+// SSE, MMX and 3DNow! vector support.
include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
-//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
-//===----------------------------------------------------------------------===//
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
-include "X86InstrMMX.td"
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrMMX.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,6 +11,9 @@
// and properties of the instructions which are needed for code generation,
// machine code emission, and analysis.
//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -18,58 +21,23 @@
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm - Simple MMX binary operator.
- multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
- let isCommutable = Commutable;
- }
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
- (bitconvert
- (load_mmx addr:$src2)))))]>;
- }
-
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
let isCommutable = Commutable;
}
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>;
}
- // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
- //
- // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
- // to collapse (bitconvert VT to VT) into its operand.
- //
- multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
- let isCommutable = Commutable;
- }
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
- }
-
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2> {
@@ -89,14 +57,75 @@
}
}
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ let isCommutable = 0 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+ def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
//===----------------------------------------------------------------------===//
-// MMX EMMS & FEMMS Instructions
+// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
[(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
- [(int_x86_mmx_femms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -106,12 +135,12 @@
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (scalar_to_vector GR32:$src)))]>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+ (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
@@ -123,42 +152,41 @@
"movd\t{$src, $dst|$dst, $src}",
[]>;
-let neverHasSideEffects = 1 in
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (bitconvert VR64:$src))]>;
def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (scalar_to_vector GR64:$src)))]>;
-
+ (bitconvert GR64:$src))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (v1i64 VR64:$src), addr:$dst)]>;
+ [(store (x86mmx VR64:$src), addr:$dst)]>;
def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (bitconvert
+ (x86mmx (bitconvert
(i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))))))]>;
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (movl immAllZerosV,
- (v2i64 (scalar_to_vector
- (i64 (bitconvert (v1i64 VR64:$src)))))))]>;
+ (v2i64 (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
@@ -176,34 +204,40 @@
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (X86vzmovl (v2i32
+ (x86mmx (X86vzmovl (x86mmx
(scalar_to_vector (loadi32 addr:$src))))))]>;
// Arithmetic Instructions
-
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
// -- Addition
-defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>;
-defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
-defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
-defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
-
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
// -- Subtraction
-defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
-defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
-defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
-defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
@@ -211,16 +245,25 @@
defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
// -- Multiplication
-defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw>;
// -- Miscellanea
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw>;
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
@@ -232,23 +275,17 @@
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
-// Logical Instructions
-defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
-defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
-defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+ defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
-let Constraints = "$src1 = $dst" in {
- def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
- VR64:$src2)))]>;
- def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
- (load addr:$src2))))]>;
-}
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
@@ -270,12 +307,6 @@
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
-// Shift up / down and insert zero's.
-def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
- (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
- (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
@@ -285,84 +316,19 @@
defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
-// Conversion Instructions
-
// -- Unpack Instructions
-let Constraints = "$src1 = $dst" in {
- // Unpack High Packed Data Instructions
- def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
-
- // Unpack Low Packed Data Instructions
- def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
-}
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+ int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+ int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+ int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+ int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+ int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+ int_x86_mmx_punpckldq>;
// -- Pack Instructions
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
@@ -370,93 +336,80 @@
defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
- (undef)))]>;
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))]>;
-// -- Conversion Instructions
-let neverHasSideEffects = 1 in {
-def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
- (ins f128mem:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
- (ins i64mem:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
- (ins i64mem:$src),
- "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
- (ins f128mem:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-} // end neverHasSideEffects
-// Extract / Insert
-def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
+ [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
(iPTR imm:$src2)))]>;
let Constraints = "$src1 = $dst" in {
- def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+ def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
- (ins VR64:$src1, GR32:$src2,i16i8imm:$src3),
+ (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- GR32:$src2,(iPTR imm:$src3))))]>;
- def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ GR32:$src2, (iPTR imm:$src3)))]>;
+
+ def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
- (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR64:$dst,
- (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- (i32 (anyext (loadi16 addr:$src2))),
- (iPTR imm:$src3))))]>;
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3)))]>;
}
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
// MMX to XMM for vector types
def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
(v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
@@ -464,14 +417,19 @@
def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
(v2i64 (MOVQI2PQIrm addr:$src))>;
-def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
- (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+def : Pat<(v2i64 (MMX_X86movq2dq
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
(v2i64 (MOVDI2PDIrm addr:$src))>;
-// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+ (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+ (x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
let Uses = [EDI] in
@@ -483,181 +441,14 @@
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
- // FIXME: Change encoding to pseudo.
- def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "",
- [(set VR64:$dst, (v2i32 immAllZerosV))]>;
- def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "",
- [(set VR64:$dst, (v2i32 immAllOnesV))]>;
-}
-
-let Predicates = [HasMMX] in {
- def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
- def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
- def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Store 64-bit integer vector values.
-def : Pat<(store (v8i8 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v4i16 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2i32 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v1i64 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-
-// Bit convert.
-def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
-
// 64-bit bit convert.
-def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
-let AddedComplexity = 20 in {
- def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
-}
-// Clear top half.
-let AddedComplexity = 15 in {
- def : Pat<(v2i32 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>;
-}
-
-// Patterns to perform canonical versions of vector shuffling.
-let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
-}
-
-let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
-}
-
-// Some special case PANDN patterns.
-// FIXME: Get rid of these.
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-
-// Move MMX to lower 64-bit of XMM
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-// Move lower 64-bit of XMM to MMX.
-def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
-
-// Patterns for vector comparisons
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
-
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
-
-// CMOV* - Used to implement the SELECT DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
- def CMOV_V1I64 : I<0, Pseudo,
- (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
- "#CMOV_V1I64 PSEUDO!",
- [(set VR64:$dst,
- (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
- EFLAGS)))]>;
-}
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86InstrSSE.td Tue Oct 26 19:48:03 2010
@@ -15,43 +15,6 @@
//===----------------------------------------------------------------------===//
-// SSE scalar FP Instructions
-//===----------------------------------------------------------------------===//
-
-// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
- def CMOV_FR32 : I<0, Pseudo,
- (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
- "#CMOV_FR32 PSEUDO!",
- [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
- EFLAGS))]>;
- def CMOV_FR64 : I<0, Pseudo,
- (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
- "#CMOV_FR64 PSEUDO!",
- [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
- EFLAGS))]>;
- def CMOV_V4F32 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V4F32 PSEUDO!",
- [(set VR128:$dst,
- (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
- def CMOV_V2F64 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V2F64 PSEUDO!",
- [(set VR128:$dst,
- (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
- def CMOV_V2I64 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V2I64 PSEUDO!",
- [(set VR128:$dst,
- (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
-}
-
-//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -82,17 +45,15 @@
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+ SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, mem_cpat:$src2))]>;
}
@@ -142,17 +103,15 @@
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))], d>;
def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, (mem_frag addr:$src2)))], d>;
}
@@ -340,6 +299,15 @@
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
}
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -394,7 +362,7 @@
string asm_opr> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- !strconcat(!strconcat(base_opc,"s"), asm_opr),
+ !strconcat(base_opc, "s", asm_opr),
[(set RC:$dst,
(mov_frag RC:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
@@ -402,7 +370,7 @@
def PDrm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, f64mem:$src2),
- !strconcat(!strconcat(base_opc,"d"), asm_opr),
+ !strconcat(base_opc, "d", asm_opr),
[(set RC:$dst, (v2f64 (mov_frag RC:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, TB, OpSize;
@@ -589,14 +557,6 @@
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
-multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
-}
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
@@ -609,16 +569,6 @@
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
-multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
- RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm, Domain d> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
- [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
-}
-
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
@@ -660,13 +610,11 @@
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
f32mem, load, "cvtss2si{q}">, XS, REX_W;
-defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD;
-defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si{q}">, XD, REX_W;
-defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
- REX_W;
let isAsmParserOnly = 1 in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -696,29 +644,6 @@
"cvtsi2sd">, XD, REX_W;
}
-// Instructions below don't have an AVX form.
-defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
- f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
-defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
- f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
- f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
-defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
- f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
- i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-let Constraints = "$src1 = $dst" in {
- defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
- int_x86_sse_cvtpi2ps,
- i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, TB;
-}
-
/// SSE 1 Only
// Aliases for intrinsics
@@ -751,10 +676,10 @@
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
"cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
VEX_W;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
}
@@ -763,7 +688,7 @@
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
"cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
@@ -894,6 +819,7 @@
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+
// Convert packed single/double fp to doubleword
let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -963,9 +889,13 @@
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
let isAsmParserOnly = 1 in {
@@ -980,16 +910,6 @@
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
}
-def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))]>,
- XS, Requires<[HasSSE2]>;
-def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))]>,
- XS, Requires<[HasSSE2]>;
let isAsmParserOnly = 1 in {
def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
@@ -1003,13 +923,13 @@
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>, VEX;
}
-def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
-def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -1117,6 +1037,39 @@
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+ (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTTPS2DQYrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
@@ -1281,16 +1234,14 @@
"cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
- let Pattern = []<dag> in {
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -1428,9 +1379,11 @@
/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
- def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set GR32:$dst, (Int RC:$src))], d>;
+ def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
}
// Mask creation
@@ -1445,20 +1398,18 @@
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
"movmskpd", SSEPackedDouble>, OpSize,
VEX;
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
- // FIXME: merge with multiclass above when the intrinsics come.
+ // Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
VEX;
-
- def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
@@ -1643,6 +1594,9 @@
///
/// These three forms can each be reg+reg or reg+mem.
///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -1682,14 +1636,24 @@
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
bit Is2Addr = 1> {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+ !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
SSEPackedSingle, Is2Addr>, TB;
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+ !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
SSEPackedDouble, Is2Addr>, TB, OpSize;
}
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+ SSEPackedSingle, 0>, TB;
+
+ defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+ SSEPackedDouble, 0>, TB, OpSize;
+}
+
// Binary Arithmetic instructions
let isAsmParserOnly = 1 in {
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
@@ -1714,11 +1678,13 @@
basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
}
@@ -1830,6 +1796,16 @@
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
}
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -1900,6 +1876,17 @@
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Square root.
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
@@ -1910,8 +1897,10 @@
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
- sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
VEX;
// Reciprocal approximations. Note that these typically require refinement
@@ -1920,12 +1909,14 @@
int_x86_sse_rsqrt_ss>, VEX_4V;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
VEX_4V;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
@@ -2015,6 +2006,13 @@
}
}
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+ (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+ (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
@@ -2083,7 +2081,9 @@
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
@@ -2095,6 +2095,26 @@
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
@@ -2214,6 +2234,10 @@
} // ExeDomain = SSEPackedInt
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
@@ -2897,6 +2921,13 @@
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
// Move Int Doubleword to Single Scalar
@@ -2939,6 +2970,21 @@
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
// Move Scalar Single to Double Int
let isAsmParserOnly = 1 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
@@ -3181,13 +3227,13 @@
// Convert Packed DW Integers to Packed Double FP
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3195,6 +3241,17 @@
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE3 - Move Instructions
//===---------------------------------------------------------------------===//
@@ -3260,10 +3317,11 @@
// Move Unaligned Integer
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
@@ -3327,12 +3385,10 @@
f128mem, 0>, XD, VEX_4V;
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
f128mem, 0>, OpSize, VEX_4V;
- let Pattern = []<dag> in {
- defm VADDSUBPSY : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR256,
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
f256mem, 0>, XD, VEX_4V;
- defm VADDSUBPDY : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR256,
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
f256mem, 0>, OpSize, VEX_4V;
- }
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
@@ -3377,24 +3433,22 @@
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- int_x86_sse3_hadd_ps, 0>, VEX_4V;
- defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- int_x86_sse3_hadd_pd, 0>, VEX_4V;
- defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- int_x86_sse3_hsub_ps, 0>, VEX_4V;
- defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- int_x86_sse3_hsub_pd, 0>, VEX_4V;
- let Pattern = []<dag> in {
- defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
int_x86_sse3_hadd_ps, 0>, VEX_4V;
- defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
int_x86_sse3_hadd_pd, 0>, VEX_4V;
- defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
int_x86_sse3_hsub_ps, 0>, VEX_4V;
- defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
int_x86_sse3_hsub_pd, 0>, VEX_4V;
- }
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ int_x86_avx_hsub_pd_256, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -3412,19 +3466,10 @@
// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
+
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
-
+ PatFrag mem_frag128, Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3440,26 +3485,20 @@
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
int_x86_ssse3_pabs_d_128>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -3467,26 +3506,9 @@
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128,
+ PatFrag mem_frag128, Intrinsic IntId128,
bit Is2Addr = 1> {
let isCommutable = 1 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
@@ -3506,84 +3528,60 @@
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
int_x86_ssse3_psign_d_128>;
}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128>;
}
@@ -3596,22 +3594,7 @@
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass sse3_palign<string asm, bit Is2Addr = 1> {
- def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
- def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
-
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -3629,29 +3612,11 @@
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PALIGN : sse3_palign<"palignr">;
+ defm PALIGN : ssse3_palign<"palignr">;
let AddedComplexity = 5 in {
-
-def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-
def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3925,38 +3890,65 @@
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
- (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
- (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+ (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
-def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
-def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
-def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
@@ -4350,44 +4342,44 @@
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
- def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ def PSm : Ii8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
TA, OpSize,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
- def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
- def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
}
@@ -4395,28 +4387,28 @@
RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr : SS4AIi8<opcps, MRMSrcReg,
+ def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
+ def PSm_AVX : Ii8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TA, OpSize, Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
- def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
- def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4428,7 +4420,7 @@
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
// Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4439,7 +4431,7 @@
OpSize;
// Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4451,7 +4443,7 @@
OpSize;
// Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4462,7 +4454,7 @@
OpSize;
// Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4477,28 +4469,28 @@
multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr> {
// Intrinsic operation, reg.
- def SSr : SS4AIi8<opcss, MRMSrcReg,
+ def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, mem.
- def SSm : SS4AIi8<opcss, MRMSrcMem,
+ def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, reg.
- def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, mem.
- def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4508,12 +4500,18 @@
// FP round - roundss, roundps, roundsd, roundpd
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Intrinsic form
- defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
- VEX;
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd,
- 0>, VEX_4V;
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V;
+
// Instructions for the assembler
defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
VEX;
@@ -4522,7 +4520,8 @@
defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
}
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
@@ -4536,53 +4535,78 @@
// the intel intrinsic that corresponds to this.
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize, VEX;
-def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize, VEX;
-def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize, VEX;
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX;
}
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
OpSize;
-def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
OpSize;
}
// The bit test instructions below are AVX only
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> {
- def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, OpSize, VEX;
- def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, OpSize, VEX;
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
}
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
- defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>;
- defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>;
- defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>;
- defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>;
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
}
//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+let mayLoad = 1 in
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+
+
+
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
Intrinsic IntId128> {
@@ -4729,12 +4753,10 @@
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
- let Pattern = []<dag> in {
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
- }
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -4744,9 +4766,8 @@
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
- let Pattern = []<dag> in
- defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -4795,13 +4816,10 @@
memopv16i8, int_x86_sse41_blendvps>;
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
memopv16i8, int_x86_sse41_pblendvb>;
-
-let Pattern = []<dag> in { // FIXME: implement 256 intrinsics here.
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
- memopv32i8, int_x86_sse41_blendvpd>;
+ memopv32i8, int_x86_avx_blendv_pd_256>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
- memopv32i8, int_x86_sse41_blendvps>;
-}
+ memopv32i8, int_x86_avx_blendv_ps_256>;
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -5262,14 +5280,19 @@
// Load from memory and broadcast to all elements of the destination operand
class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> :
+ X86MemOperand x86memop, Intrinsic Int> :
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, VEX;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
-def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem>;
-def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem>;
-def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem>;
-def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem>;
+def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>;
// Insert packed floating-point values
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
@@ -5292,53 +5315,83 @@
[]>, VEX;
// Conditional SIMD Packed Loads and Stores
-multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr> {
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256,
+ PatFrag pf128, PatFrag pf256> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V;
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V;
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V;
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V;
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
}
-defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps">;
-defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd">;
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256,
+ memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256,
+ memopv2f64, memopv4f64>;
// Permute Floating-Point Values
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop> {
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+ Intrinsic IntVar, Intrinsic IntImm> {
def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V;
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
+ (ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V;
+ [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX;
+ [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
+ (ins x86memop_f:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX;
+ [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
}
-defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem>;
-defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem>;
-defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem>;
-defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem>;
+defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -5350,11 +5403,403 @@
[]>, VEX_4V;
// Zero All YMM registers
-def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", []>, VEX, VEX_L,
- Requires<[HasAVX]>;
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
// Zero Upper bits of YMM registers
-def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", []>, VEX,
- Requires<[HasAVX]>;
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
} // isAsmParserOnly
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+ VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+ VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+ VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+ (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+ (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+ (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+ (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+ (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+ (bc_v4i32 (v2i64 (load addr:$src2)))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+ (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+ (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86JITInfo.cpp Tue Oct 26 19:48:03 2010
@@ -337,7 +337,7 @@
// no support for inline assembly
static
#endif
-void ATTRIBUTE_USED
+void LLVM_ATTRIBUTE_USED
X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
intptr_t *RetAddrLoc = &StackPtr[1];
assert(*RetAddrLoc == RetAddr &&
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCAsmInfo.cpp Tue Oct 26 19:48:03 2010
@@ -103,6 +103,9 @@
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64)
+ GlobalPrefix = "";
+
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCCodeEmitter.cpp Tue Oct 26 19:48:03 2010
@@ -18,6 +18,7 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -38,7 +39,7 @@
~X86MCCodeEmitter() {}
unsigned getNumFixupKinds() const {
- return 5;
+ return 7;
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
@@ -47,7 +48,9 @@
{ "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
};
if (Kind < FirstTargetFixupKind)
@@ -179,6 +182,37 @@
}
}
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+ Expr = BE->getLHS();
+ }
+
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return false;
+
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbol &S = Ref->getSymbol();
+ return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
void X86MCCodeEmitter::
EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
@@ -196,6 +230,13 @@
// If we have an immoffset, add it to the expression.
const MCExpr *Expr = DispOp.getExpr();
+ if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ ImmOffset = CurByte;
+ }
+
// If the fixup is pc-relative, we need to bias the value to be relative to
// the start of the field, not the end of the field.
if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
@@ -221,10 +262,10 @@
uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const{
- const MCOperand &Disp = MI.getOperand(Op+3);
- const MCOperand &Base = MI.getOperand(Op);
- const MCOperand &Scale = MI.getOperand(Op+1);
- const MCOperand &IndexReg = MI.getOperand(Op+2);
+ const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
// Handle %rip relative addressing.
@@ -238,8 +279,7 @@
// movq loads are handled with a special relocation form which allows the
// linker to eliminate some loads for GOT references which end up in the
// same linkage unit.
- if (MI.getOpcode() == X86::MOV64rm ||
- MI.getOpcode() == X86::MOV64rm_TC)
+ if (MI.getOpcode() == X86::MOV64rm)
FixupKind = X86::reloc_riprel_4byte_movq_load;
// rip-relative addressing is actually relative to the *next* instruction.
@@ -295,7 +335,8 @@
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
return;
}
@@ -355,7 +396,8 @@
if (ForceDisp8)
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
else if (ForceDisp32 || Disp.getImm() != 0)
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
}
/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -365,7 +407,7 @@
const TargetInstrDesc &Desc,
raw_ostream &OS) const {
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// VEX_R: opcode externsion equivalent to REX.R in
@@ -429,10 +471,10 @@
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
- if (TSFlags & X86II::VEX_W)
+ if ((TSFlags >> 32) & X86II::VEX_W)
VEX_W = 1;
- if (TSFlags & X86II::VEX_L)
+ if ((TSFlags >> 32) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
@@ -501,7 +543,7 @@
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
- if (TSFlags & X86II::VEX_I8IMM)
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM)
NumOps--;
for (; CurOp != NumOps; ++CurOp) {
@@ -708,14 +750,15 @@
if ((TSFlags & X86II::Op0Mask) == X86II::REP)
EmitByte(0xF3, CurByte, OS);
+ // Emit the address size opcode prefix as needed.
+ if ((TSFlags & X86II::AdSize) ||
+ (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+ EmitByte(0x67, CurByte, OS);
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
- // Emit the address size opcode prefix as needed.
- if (TSFlags & X86II::AdSize)
- EmitByte(0x67, CurByte, OS);
-
bool Need0FPrefix = false;
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
@@ -801,11 +844,12 @@
// It uses the VEX.VVVV field?
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX)
+ if ((TSFlags >> 32) & X86II::VEX)
HasVEXPrefix = true;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
+
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -815,7 +859,12 @@
else
EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+
+ if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+
unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
@@ -827,6 +876,21 @@
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
+
+ case X86II::RawFrmImm8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+ break;
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ break;
case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
@@ -947,7 +1011,7 @@
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
- if (TSFlags & X86II::VEX_I8IMM) {
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
bool IsExtReg =
X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
@@ -955,12 +1019,21 @@
RegNum |= GetX86RegNum(MO) << 4;
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
Fixups);
- } else
+ } else {
+ unsigned FixupKind;
+ if (MI.getOpcode() == X86::MOV64ri32 || MI.getOpcode() == X86::MOV64mi32)
+ FixupKind = X86::reloc_signed_4byte;
+ else
+ FixupKind = getImmFixupKind(TSFlags);
EmitImmediate(MI.getOperand(CurOp++),
- X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
CurByte, OS, Fixups);
+ }
}
+ if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+
#ifndef NDEBUG
// FIXME: Verify.
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86MCInstLower.cpp Tue Oct 26 19:48:03 2010
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
@@ -252,7 +253,13 @@
}
/// \brief Simplify things like MOV32rm to MOV32o32a.
-static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
unsigned AddrBase = IsStore;
unsigned RegOp = IsStore ? 0 : 5;
@@ -341,6 +348,7 @@
}
// Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
switch (OutMI.getOpcode()) {
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
lower_lea64_32mem(&OutMI, 1);
@@ -371,15 +379,17 @@
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
- case X86::MMX_V_SETALLONES:
- LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
- case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
- case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+ case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+ case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+ case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+ case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -390,12 +400,14 @@
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
// register inputs modeled as normal uses instead of implicit uses. As such,
// truncate off all but the first operand (the callee). FIXME: Change isel.
case X86::TAILJMPr64:
case X86::CALL64r:
- case X86::CALL64pcrel32: {
+ case X86::CALL64pcrel32:
+ case X86::WINCALL64r:
+ case X86::WINCALL64pcrel32: {
unsigned Opcode = OutMI.getOpcode();
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
@@ -404,6 +416,13 @@
break;
}
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::RET);
+ break;
+ }
+
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
case X86::TAILJMPr:
case X86::TAILJMPd:
@@ -423,6 +442,19 @@
break;
}
+ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
+ // this with an ugly goto in case the resultant OR uses EAX and needs the
+ // short form.
+ case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+ case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+ case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+ case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+ case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+ case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+ case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+ case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -453,15 +485,13 @@
// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar
case X86::MOV8mr_NOREX:
- case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
case X86::MOV8rm_NOREX:
- case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
- case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
- case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
- case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
- case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
- case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
- case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
@@ -515,6 +545,21 @@
}
return;
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ unsigned Reg = MI->getOperand(0).getReg();
+ OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64:
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -38,8 +38,15 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
@@ -152,46 +159,21 @@
case X86::YMM7: case X86::YMM15: case X86::MM7:
return 7;
- case X86::ES:
- return 0;
- case X86::CS:
- return 1;
- case X86::SS:
- return 2;
- case X86::DS:
- return 3;
- case X86::FS:
- return 4;
- case X86::GS:
- return 5;
-
- case X86::CR0:
- return 0;
- case X86::CR1:
- return 1;
- case X86::CR2:
- return 2;
- case X86::CR3:
- return 3;
- case X86::CR4:
- return 4;
-
- case X86::DR0:
- return 0;
- case X86::DR1:
- return 1;
- case X86::DR2:
- return 2;
- case X86::DR3:
- return 3;
- case X86::DR4:
- return 4;
- case X86::DR5:
- return 5;
- case X86::DR6:
- return 6;
- case X86::DR7:
- return 7;
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
// Pseudo index registers are equivalent to a "none"
// scaled index (See Intel Manual 2A, table 2-3)
@@ -288,9 +270,14 @@
}
break;
case X86::sub_32bit:
- if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+ if (B == &X86::GR32RegClass) {
if (A->getSize() == 8)
return A;
+ } else if (B == &X86::GR32_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+ return &X86::GR64_NOSPRegClass;
+ if (A->getSize() == 8)
+ return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
} else if (B == &X86::GR32_ABCDRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
A == &X86::GR64_NOREXRegClass ||
@@ -471,7 +458,11 @@
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
return requiresRealignment && canRealignStack(MF);
}
@@ -615,10 +606,9 @@
MBB.erase(I);
}
-unsigned
+void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
+ int SPAdj, RegScavenger *RS) const{
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -665,7 +655,6 @@
uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
MI.getOperand(i+3).setOffset(Offset);
}
- return 0;
}
void
@@ -755,7 +744,7 @@
}
}
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -887,6 +876,19 @@
}
}
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -906,6 +908,17 @@
bool HasFP = hasFP(MF);
DebugLoc DL;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
@@ -920,12 +933,12 @@
!needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- !Subtarget->isTargetWin64()) { // Win64 has no Red Zone
+ !IsWin64) { // Win64 has no Red Zone
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
MFI->setStackSize(StackSize);
- } else if (Subtarget->isTargetWin64()) {
+ } else if (IsWin64) {
// We need to always allocate 32 bytes as register spill area.
// FIXME: We might reuse these 32 bytes for leaf functions.
StackSize += 32;
@@ -1066,28 +1079,40 @@
DL = MBB.findDebugLoc(MBBI);
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
// Adjust stack pointer: ESP -= numbytes.
- if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
- // Check, whether EAX is livein for this function.
- bool isEAXAlive = false;
- for (MachineRegisterInfo::livein_iterator
- II = MF.getRegInfo().livein_begin(),
- EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
- unsigned Reg = II->first;
- isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL);
- }
- // Function prologue calls _alloca to probe the stack when allocating more
- // than 4k bytes in one go. Touching the stack at 4K increments is necessary
- // to ensure that the guard pages used by the OS virtual memory manager are
- // allocated in correct sequence.
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
+ // stack and adjust the stack pointer in one go. The 64-bit version of
+ // __chkstk is only responsible for probing the stack. The 64-bit prologue is
+ // responsible for adjusting the stack pointer. Touching the stack at 4K
+ // increments is necessary to ensure that the guard pages used by the OS
+ // virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 &&
+ (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
+ // Check whether EAX is livein for this function.
+ bool isEAXAlive = isEAXLiveIn(MF);
+
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+ unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1097,9 +1122,10 @@
// allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
@@ -1107,19 +1133,21 @@
StackPtr, false, NumBytes - 4);
MBB.insert(MBBI, MI);
}
- } else if (NumBytes) {
- // If there is an SUB32ri of ESP immediately before this instruction, merge
- // the two. This can be the case when tail call elimination is enabled and
- // the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
- // If there is an ADD32ri or SUB32ri of ESP immediately after this
- // instruction, merge the two instructions.
- mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
- }
+ } else if (NumBytes >= 4096 && Subtarget->isTargetWin64()) {
+ // Sanity check that EAX is not livein for this function. It should
+ // should not be, so throw an assert.
+ assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
+
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
+ .addExternalSymbol("__chkstk")
+ .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+ } else if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
if ((NumBytes || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
@@ -1177,6 +1205,17 @@
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -1524,7 +1563,7 @@
namespace {
struct MSAH : public MachineFunctionPass {
static char ID;
- MSAH() : MachineFunctionPass(&ID) {}
+ MSAH() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -126,9 +126,8 @@
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86RegisterInfo.td Tue Oct 26 19:48:03 2010
@@ -231,7 +231,7 @@
def DR6 : Register<"dr6">;
def DR7 : Register<"dr7">;
- // Condition registers
+ // Control registers
def CR0 : Register<"cr0">;
def CR1 : Register<"cr1">;
def CR2 : Register<"cr2">;
@@ -241,6 +241,13 @@
def CR6 : Register<"cr6">;
def CR7 : Register<"cr7">;
def CR8 : Register<"cr8">;
+ def CR9 : Register<"cr9">;
+ def CR10 : Register<"cr10">;
+ def CR11 : Register<"cr11">;
+ def CR12 : Register<"cr12">;
+ def CR13 : Register<"cr13">;
+ def CR14 : Register<"cr14">;
+ def CR15 : Register<"cr15">;
// Pseudo index registers
def EIZ : Register<"eiz">;
@@ -456,7 +463,8 @@
// Control registers.
def CONTROL_REG : RegisterClass<"X86", [i64], 64,
- [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
+ [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
+ CR9, CR10, CR11, CR12, CR13, CR14, CR15]> {
}
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
@@ -784,7 +792,7 @@
}
// Generic vector registers: VR64 and VR128.
-def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+def VR64: RegisterClass<"X86", [x86mmx], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
@@ -833,4 +841,15 @@
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
let CopyCost = -1; // Don't allow copying of status registers.
+
+ // EFLAGS is not allocatable.
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CCRClass::iterator
+ CCRClass::allocation_order_end(const MachineFunction &MF) const {
+ return allocation_order_begin(MF);
+ }
+ }];
}
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.cpp Tue Oct 26 19:48:03 2010
@@ -32,10 +32,13 @@
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const {
+ MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
@@ -161,7 +164,7 @@
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstSV, DstSVOff + Offset);
+ Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -173,10 +176,8 @@
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
// This requires the copy size to be a constant, preferrably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -190,6 +191,11 @@
if ((Align & 3) != 0)
return SDValue();
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256 ||
+ SrcPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
// DWORD aligned
EVT AVT = MVT::i32;
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
@@ -234,8 +240,8 @@
DAG.getConstant(Offset, SrcVT)),
DAG.getConstant(BytesLeft, SizeVT),
Align, isVolatile, AlwaysInline,
- DstSV, DstSVOff + Offset,
- SrcSV, SrcSVOff + Offset));
+ DstPtrInfo.getWithOffset(Offset),
+ SrcPtrInfo.getWithOffset(Offset)));
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86SelectionDAGInfo.h Tue Oct 26 19:48:03 2010
@@ -39,8 +39,7 @@
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const;
+ MachinePointerInfo DstPtrInfo) const;
virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
@@ -48,10 +47,8 @@
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
};
}
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.cpp Tue Oct 26 19:48:03 2010
@@ -73,7 +73,7 @@
if (GV->hasDefaultVisibility() &&
(isDecl || GV->isWeakForLinker()))
return X86II::MO_GOTPCREL;
- } else {
+ } else if (!isTargetWin64()) {
assert(isTargetELF() && "Unknown rip-relative target");
// Extra load is needed for all externally visible.
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86Subtarget.h Tue Oct 26 19:48:03 2010
@@ -186,18 +186,8 @@
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
- std::string getDataLayout() const {
- const char *p;
- if (is64Bit())
- p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
- else if (isTargetDarwin())
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
- else if (isTargetMingw() || isTargetWindows())
- p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
- else
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
-
- return std::string(p);
+ bool isTargetWin32() const {
+ return !Is64Bit && (isTargetMingw() || isTargetWindows());
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.cpp Tue Oct 26 19:48:03 2010
@@ -46,10 +46,15 @@
bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
+ case Triple::Darwin:
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ case Triple::MinGW32:
+ case Triple::MinGW64:
+ case Triple::Cygwin:
case Triple::Win32:
return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
default:
- return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
}
}
@@ -84,13 +89,28 @@
X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
- : X86TargetMachine(T, TT, FS, false) {
+ : X86TargetMachine(T, TT, FS, false),
+ DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+ (getSubtargetImpl()->isTargetCygMing() ||
+ getSubtargetImpl()->isTargetWindows()) ?
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
}
X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
- : X86TargetMachine(T, TT, FS, true) {
+ : X86TargetMachine(T, TT, FS, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
}
/// X86TargetMachine ctor - Create an X86 target.
@@ -99,23 +119,27 @@
const std::string &FS, bool is64Bit)
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
- DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
Subtarget.getStackAlignment(),
(Subtarget.isTargetWin64() ? -40 :
(Subtarget.is64Bit() ? -8 : -4))),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
- ELFWriterInfo(*this) {
+ ELFWriterInfo(is64Bit, true) {
DefRelocModel = getRelocationModel();
-
+
// If no relocation model was picked, default as appropriate for the target.
if (getRelocationModel() == Reloc::Default) {
- if (!Subtarget.isTargetDarwin())
- setRelocationModel(Reloc::Static);
- else if (Subtarget.is64Bit())
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::DynamicNoPIC);
+ } else if (Subtarget.isTargetWin64())
setRelocationModel(Reloc::PIC_);
else
- setRelocationModel(Reloc::DynamicNoPIC);
+ setRelocationModel(Reloc::Static);
}
assert(getRelocationModel() != Reloc::Default &&
@@ -138,29 +162,27 @@
Subtarget.isTargetDarwin() &&
is64Bit)
setRelocationModel(Reloc::PIC_);
-
+
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
} else if (Subtarget.isTargetCygMing()) {
Subtarget.setPICStyle(PICStyles::None);
} else if (Subtarget.isTargetDarwin()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else if (getRelocationModel() == Reloc::PIC_)
+ if (getRelocationModel() == Reloc::PIC_)
Subtarget.setPICStyle(PICStyles::StubPIC);
else {
assert(getRelocationModel() == Reloc::DynamicNoPIC);
Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
}
} else if (Subtarget.isTargetELF()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else
- Subtarget.setPICStyle(PICStyles::GOT);
+ Subtarget.setPICStyle(PICStyles::GOT);
}
-
+
// Finally, if we have "none" as our PIC style, force to static mode.
if (Subtarget.getPICStyle() == PICStyles::None)
setRelocationModel(Reloc::Static);
Modified: llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/wendling/eh/lib/Target/X86/X86TargetMachine.h Tue Oct 26 19:48:03 2010
@@ -31,12 +31,7 @@
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
TargetFrameInfo FrameInfo;
- X86InstrInfo InstrInfo;
- X86JITInfo JITInfo;
- X86TargetLowering TLInfo;
- X86SelectionDAGInfo TSInfo;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -49,20 +44,23 @@
X86TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit);
- virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ llvm_unreachable("getInstrInfo not implemented");
+ }
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
- virtual X86JITInfo *getJITInfo() { return &JITInfo; }
+ virtual X86JITInfo *getJITInfo() {
+ llvm_unreachable("getJITInfo not implemented");
+ }
virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
- virtual const X86TargetLowering *getTargetLowering() const {
- return &TLInfo;
+ virtual const X86TargetLowering *getTargetLowering() const {
+ llvm_unreachable("getTargetLowering not implemented");
}
virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
- return &TSInfo;
+ llvm_unreachable("getSelectionDAGInfo not implemented");
}
virtual const X86RegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86ELFWriterInfo *getELFWriterInfo() const {
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
}
@@ -79,17 +77,53 @@
/// X86_32TargetMachine - X86 32-bit target machine.
///
class X86_32TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
public:
X86_32TargetMachine(const Target &T, const std::string &M,
const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
};
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
public:
X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
};
} // End llvm namespace
Modified: llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -10,7 +10,7 @@
tablegen(XCoreGenCallingConv.inc -gen-callingconv)
tablegen(XCoreGenSubtarget.inc -gen-subtarget)
-add_llvm_target(XCore
+add_llvm_target(XCoreCodeGen
XCoreFrameInfo.cpp
XCoreInstrInfo.cpp
XCoreISelDAGToDAG.cpp
Modified: llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/TargetInfo/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -4,4 +4,4 @@
XCoreTargetInfo.cpp
)
-add_dependencies(LLVMXCoreInfo XCoreTable_gen)
+add_dependencies(LLVMXCoreInfo XCoreCodeGenTable_gen)
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -56,13 +56,21 @@
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
// Complex Pattern Selectors.
- bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
- SDValue &Offset);
- bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base,
- SDValue &Offset);
- bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
- SDValue &Offset);
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual const char *getPassName() const {
return "XCore DAG->DAG Pattern Instruction Selection";
@@ -80,8 +88,8 @@
return new XCoreDAGToDAGISel(TM);
}
-bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
FrameIndexSDNode *FIN = 0;
if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -102,8 +110,8 @@
return false;
}
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(0, MVT::i32);
@@ -123,8 +131,8 @@
return false;
}
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(0, MVT::i32);
@@ -151,17 +159,15 @@
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
- if (Predicate_immMskBitp(N)) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
// Transformation function: get the size of a mask
- int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
- assert(isMask_32(MaskVal));
// Look for the first non-zero bit
- SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
MVT::i32, MskSize);
}
- else if (! Predicate_immU16(N)) {
- unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ else if (!isUInt<16>(Val)) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -392,24 +392,23 @@
}
SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-{
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(Op);
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Unexpected extension type");
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
- if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
return SDValue();
- }
+
unsigned ABIAlignment = getTargetData()->
getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
// Leave aligned load alone.
- if (LD->getAlignment() >= ABIAlignment) {
+ if (LD->getAlignment() >= ABIAlignment)
return SDValue();
- }
+
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Base;
int64_t Offset;
@@ -419,10 +418,8 @@
// We've managed to infer better alignment information than the load
// already has. Use an aligned load.
//
- // FIXME: No new alignment information is actually passed here.
- // Should the offset really be 4?
- //
- return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+ return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+ MachinePointerInfo(),
false, false, 0);
}
// Lower to
@@ -436,40 +433,40 @@
SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
- SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
- SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
+ SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
- SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
- LowAddr, NULL, 4, false, false, 0);
- SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
- HighAddr, NULL, 4, false, false, 0);
- SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
- SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+ SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+ LowAddr, MachinePointerInfo(), false, false, 0);
+ SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+ HighAddr, MachinePointerInfo(), false, false, 0);
+ SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, 2, DL);
}
if (LD->getAlignment() == 2) {
- int SVOffset = LD->getSrcValueOffset();
- SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain,
- BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, DL, Chain,
+ BasePtr, LD->getPointerInfo(), MVT::i16,
LD->isVolatile(), LD->isNonTemporal(), 2);
- SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
- SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain,
- HighAddr, LD->getSrcValue(), SVOffset + 2,
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, DL, Chain,
+ HighAddr,
+ LD->getPointerInfo().getWithOffset(2),
MVT::i16, LD->isVolatile(),
LD->isNonTemporal(), 2);
- SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
DAG.getConstant(16, MVT::i32));
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, 2, DL);
}
// Lower to a call to __misaligned_load(BasePtr).
@@ -486,12 +483,12 @@
false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- Args, DAG, dl);
+ Args, DAG, DL);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, 2, DL);
}
SDValue XCoreTargetLowering::
@@ -515,18 +512,17 @@
DebugLoc dl = Op.getDebugLoc();
if (ST->getAlignment() == 2) {
- int SVOffset = ST->getSrcValueOffset();
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
DAG.getConstant(16, MVT::i32));
SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
- ST->getSrcValue(), SVOffset, MVT::i16,
+ ST->getPointerInfo(), MVT::i16,
ST->isVolatile(), ST->isNonTemporal(),
2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
- ST->getSrcValue(), SVOffset + 2,
+ ST->getPointerInfo().getWithOffset(2),
MVT::i16, ST->isVolatile(),
ST->isNonTemporal(), 2);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
@@ -757,16 +753,18 @@
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(1), V, 0, false, false, 0);
+ Node->getOperand(1), MachinePointerInfo(V),
+ false, false, 0);
// Increment the pointer, VAList, to the next vararg
SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
DAG.getConstant(VT.getSizeInBits(),
getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
- false, false, 0);
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+ MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
- return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -778,9 +776,8 @@
MachineFunction &MF = DAG.getMachineFunction();
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
- const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+ MachinePointerInfo(), false, false, 0);
}
SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -1079,7 +1076,8 @@
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -1111,8 +1109,8 @@
RegInfo.addLiveIn(ArgRegs[i], VReg);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
// Move argument from virt reg -> stack
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
}
if (!MemOps.empty())
@@ -1443,9 +1441,8 @@
return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
LD->getBasePtr(),
DAG.getConstant(StoreBits/8, MVT::i32),
- Alignment, false, ST->getSrcValue(),
- ST->getSrcValueOffset(), LD->getSrcValue(),
- LD->getSrcValueOffset());
+ Alignment, false, ST->getPointerInfo(),
+ LD->getPointerInfo());
}
}
break;
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -140,17 +140,7 @@
return (uint32_t)N->getZExtValue() < (1 << 20);
}]>;
-def immMskBitp : PatLeaf<(imm), [{
- uint32_t value = (uint32_t)N->getZExtValue();
- if (!isMask_32(value)) {
- return false;
- }
- int msksize = 32 - CountLeadingZeros_32(value);
- return (msksize >= 1 && msksize <= 8)
- || msksize == 16
- || msksize == 24
- || msksize == 32;
-}]>;
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
def immBitp : PatLeaf<(imm), [{
uint32_t value = (uint32_t)N->getZExtValue();
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.cpp Tue Oct 26 19:48:03 2010
@@ -155,10 +155,9 @@
MBB.erase(I);
}
-unsigned
+void
XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
@@ -291,7 +290,6 @@
}
// Erase old instruction.
MBB.erase(II);
- return 0;
}
void
Modified: llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/XCore/XCoreRegisterInfo.h Tue Oct 26 19:48:03 2010
@@ -54,9 +54,8 @@
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
Modified: llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Hello/Hello.cpp Tue Oct 26 19:48:03 2010
@@ -25,7 +25,7 @@
// Hello - The first implementation, without getAnalysisUsage.
struct Hello : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Hello() : FunctionPass(&ID) {}
+ Hello() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
++HelloCounter;
@@ -37,13 +37,13 @@
}
char Hello::ID = 0;
-INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false);
+static RegisterPass<Hello> X("hello", "Hello World Pass");
namespace {
// Hello2 - The second implementation with getAnalysisUsage implemented.
struct Hello2 : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Hello2() : FunctionPass(&ID) {}
+ Hello2() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
++HelloCounter;
@@ -60,6 +60,5 @@
}
char Hello2::ID = 0;
-INITIALIZE_PASS(Hello2, "hello2",
- "Hello World Pass (with getAnalysisUsage implemented)",
- false, false);
+static RegisterPass<Hello2>
+Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/ArgumentPromotion.cpp Tue Oct 26 19:48:03 2010
@@ -67,7 +67,9 @@
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(&ID), maxElements(maxElements) {}
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +86,12 @@
}
char ArgPromotion::ID = 0;
-INITIALIZE_PASS(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false);
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
@@ -445,7 +451,7 @@
const PointerType *LoadTy =
cast<PointerType>(Load->getPointerOperand()->getType());
- unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
+ uint64_t LoadSize = TD->getTypeStoreSize(LoadTy->getElementType());
if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
return false; // Pointer is invalidated!
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -23,5 +23,3 @@
StripSymbols.cpp
StructRetPromotion.cpp
)
-
-target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine)
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/ConstantMerge.cpp Tue Oct 26 19:48:03 2010
@@ -19,10 +19,12 @@
#define DEBUG_TYPE "constmerge"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -31,7 +33,9 @@
namespace {
struct ConstantMerge : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- ConstantMerge() : ModulePass(&ID) {}
+ ConstantMerge() : ModulePass(ID) {
+ initializeConstantMergePass(*PassRegistry::getPassRegistry());
+ }
// run - For this pass, process all of the globals in the module,
// eliminating duplicate constants.
@@ -42,11 +46,31 @@
char ConstantMerge::ID = 0;
INITIALIZE_PASS(ConstantMerge, "constmerge",
- "Merge Duplicate Global Constants", false, false);
+ "Merge Duplicate Global Constants", false, false)
ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
bool ConstantMerge::runOnModule(Module &M) {
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
// Map unique constant/section pairs to globals. We don't want to merge
// globals in different sections.
DenseMap<Constant*, GlobalVariable*> CMap;
@@ -79,9 +103,13 @@
// Only process constants with initializers in the default addres space.
if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+ GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
continue;
+
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/DeadArgumentElimination.cpp Tue Oct 26 19:48:03 2010
@@ -122,11 +122,13 @@
protected:
// DAH uses this to specify a different ID.
- explicit DAE(void *ID) : ModulePass(ID) {}
+ explicit DAE(char &ID) : ModulePass(ID) {}
public:
static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(&ID) {}
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
@@ -151,7 +153,7 @@
char DAE::ID = 0;
-INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
namespace {
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -159,7 +161,7 @@
/// by bugpoint.
struct DAH : public DAE {
static char ID;
- DAH() : DAE(&ID) {}
+ DAH() : DAE(ID) {}
virtual bool ShouldHackArguments() const { return true; }
};
@@ -168,7 +170,7 @@
char DAH::ID = 0;
INITIALIZE_PASS(DAH, "deadarghaX0r",
"Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
- false, false);
+ false, false)
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
@@ -797,7 +799,8 @@
} else if (New->getType()->isVoidTy()) {
// Our return value has uses, but they will get removed later on.
// Replace by null for now.
- Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ if (!Call->getType()->isX86_MMXTy())
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
assert(RetTy->isStructTy() &&
"Return type changed, but not into a void. The old return type"
@@ -860,7 +863,8 @@
} else {
// If this argument is dead, replace any uses of it with null constants
// (these are guaranteed to become unused later on).
- I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ if (!I->getType()->isX86_MMXTy())
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
}
// If we change the return value of the function we must rewrite any return
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/DeadTypeElimination.cpp Tue Oct 26 19:48:03 2010
@@ -26,7 +26,9 @@
namespace {
struct DTE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- DTE() : ModulePass(&ID) {}
+ DTE() : ModulePass(ID) {
+ initializeDTEPass(*PassRegistry::getPassRegistry());
+ }
// doPassInitialization - For this pass, it removes global symbol table
// entries for primitive types. These are never used for linking in GCC and
@@ -45,7 +47,10 @@
}
char DTE::ID = 0;
-INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
+INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
ModulePass *llvm::createDeadTypeEliminationPass() {
return new DTE();
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/ExtractGV.cpp Tue Oct 26 19:48:03 2010
@@ -17,15 +17,15 @@
#include "llvm/Pass.h"
#include "llvm/Constants.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
#include <algorithm>
using namespace llvm;
namespace {
/// @brief A pass to extract specific functions and their dependencies.
class GVExtractorPass : public ModulePass {
- std::vector<GlobalValue*> Named;
+ SetVector<GlobalValue *> Named;
bool deleteStuff;
- bool reLink;
public:
static char ID; // Pass identification, replacement for typeid
@@ -33,133 +33,38 @@
/// specified function. Otherwise, it deletes as much of the module as
/// possible, except for the function specified.
///
- explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true,
- bool relinkCallees = false)
- : ModulePass(&ID), Named(GVs), deleteStuff(deleteS),
- reLink(relinkCallees) {}
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
bool runOnModule(Module &M) {
- if (Named.size() == 0) {
- return false; // Nothing to extract
- }
-
-
- if (deleteStuff)
- return deleteGV();
- M.setModuleInlineAsm("");
- return isolateGV(M);
- }
-
- bool deleteGV() {
- for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
- GE = Named.end(); GI != GE; ++GI) {
- if (Function* NamedFunc = dyn_cast<Function>(*GI)) {
- // If we're in relinking mode, set linkage of all internal callees to
- // external. This will allow us extract function, and then - link
- // everything together
- if (reLink) {
- for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end();
- B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), E = B->end();
- I != E; ++I) {
- if (CallInst* callInst = dyn_cast<CallInst>(&*I)) {
- Function* Callee = callInst->getCalledFunction();
- if (Callee && Callee->hasLocalLinkage())
- Callee->setLinkage(GlobalValue::ExternalLinkage);
- }
- }
- }
- }
-
- NamedFunc->setLinkage(GlobalValue::ExternalLinkage);
- NamedFunc->deleteBody();
- assert(NamedFunc->isDeclaration() && "This didn't make the function external!");
- } else {
- if (!(*GI)->isDeclaration()) {
- cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer
- (*GI)->setLinkage(GlobalValue::ExternalLinkage);
- }
- }
- }
- return true;
- }
-
- bool isolateGV(Module &M) {
- // Mark all globals internal
- // FIXME: what should we do with private linkage?
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
- if (!I->isDeclaration()) {
- I->setLinkage(GlobalValue::InternalLinkage);
- }
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration()) {
- I->setLinkage(GlobalValue::InternalLinkage);
- }
-
- // Make sure our result is globally accessible...
- // by putting them in the used array
- {
- std::vector<Constant *> AUGs;
- const Type *SBP=
- Type::getInt8PtrTy(M.getContext());
- for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
- GE = Named.end(); GI != GE; ++GI) {
- (*GI)->setLinkage(GlobalValue::ExternalLinkage);
- AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP));
- }
- ArrayType *AT = ArrayType::get(SBP, AUGs.size());
- Constant *Init = ConstantArray::get(AT, AUGs);
- GlobalValue *gv = new GlobalVariable(M, AT, false,
- GlobalValue::AppendingLinkage,
- Init, "llvm.used");
- gv->setSection("llvm.metadata");
- }
-
- // All of the functions may be used by global variables or the named
- // globals. Loop through them and create a new, external functions that
- // can be "used", instead of ones with bodies.
- std::vector<Function*> NewFunctions;
-
- Function *Last = --M.end(); // Figure out where the last real fn is.
-
- for (Module::iterator I = M.begin(); ; ++I) {
- if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
- Function *New = Function::Create(I->getFunctionType(),
- GlobalValue::ExternalLinkage);
- New->copyAttributesFrom(I);
-
- // If it's not the named function, delete the body of the function
- I->dropAllReferences();
-
- M.getFunctionList().push_back(New);
- NewFunctions.push_back(New);
- New->takeName(I);
- }
-
- if (&*I == Last) break; // Stop after processing the last function
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+ I->setInitializer(0);
}
- // Now that we have replacements all set up, loop through the module,
- // deleting the old functions, replacing them with the newly created
- // functions.
- if (!NewFunctions.empty()) {
- unsigned FuncNum = 0;
- Module::iterator I = M.begin();
- do {
- if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
- // Make everything that uses the old function use the new dummy fn
- I->replaceAllUsesWith(NewFunctions[FuncNum++]);
-
- Function *Old = I;
- ++I; // Move the iterator to the new function
-
- // Delete the old function!
- M.getFunctionList().erase(Old);
-
- } else {
- ++I; // Skip the function we are extracting
- }
- } while (&*I != NewFunctions[0]);
+ // Visit the Functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+ I->deleteBody();
}
return true;
@@ -170,6 +75,6 @@
}
ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
- bool deleteFn, bool relinkCallees) {
- return new GVExtractorPass(GVs, deleteFn, relinkCallees);
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/FunctionAttrs.cpp Tue Oct 26 19:48:03 2010
@@ -41,7 +41,9 @@
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(&ID) {}
+ FunctionAttrs() : CallGraphSCCPass(ID) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -69,8 +71,11 @@
}
char FunctionAttrs::ID = 0;
-INITIALIZE_PASS(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false);
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
@@ -169,7 +174,7 @@
continue;
// Ignore intrinsics that only access local memory.
if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
- if (AliasAnalysis::getModRefBehavior(id) ==
+ if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
AliasAnalysis::AccessesArguments) {
// Check that all pointer arguments point to local memory.
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalDCE.cpp Tue Oct 26 19:48:03 2010
@@ -31,7 +31,9 @@
namespace {
struct GlobalDCE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- GlobalDCE() : ModulePass(&ID) {}
+ GlobalDCE() : ModulePass(ID) {
+ initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ }
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
@@ -52,7 +54,7 @@
char GlobalDCE::ID = 0;
INITIALIZE_PASS(GlobalDCE, "globaldce",
- "Dead Global Elimination", false, false);
+ "Dead Global Elimination", false, false)
ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/GlobalOpt.cpp Tue Oct 26 19:48:03 2010
@@ -59,7 +59,9 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
static char ID; // Pass identification, replacement for typeid
- GlobalOpt() : ModulePass(&ID) {}
+ GlobalOpt() : ModulePass(ID) {
+ initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
@@ -75,7 +77,7 @@
char GlobalOpt::ID = 0;
INITIALIZE_PASS(GlobalOpt, "globalopt",
- "Global Variable Optimizer", false, false);
+ "Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -129,7 +131,7 @@
/// HasPHIUser - Set to true if this global has a user that is a PHI node.
bool HasPHIUser;
-
+
GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
AccessingFunction(0), HasMultipleAccessingFunctions(false),
HasNonInstructionUser(false), HasPHIUser(false) {}
@@ -308,7 +310,7 @@
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit);
- } else if (CE->getOpcode() == Instruction::BitCast &&
+ } else if (CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) {
// Pointer cast, delete any stores and memsets to the global.
Changed |= CleanupConstantGlobalUsers(CE, 0);
@@ -324,7 +326,7 @@
// and will invalidate our notion of what Init is.
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE =
+ ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -361,7 +363,7 @@
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
return SafeToDestroyConstant(C);
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -371,15 +373,15 @@
// Stores *to* the pointer are ok.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getOperand(0) != V;
-
+
// Otherwise, it must be a GEP.
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
if (GEPI == 0) return false;
-
+
if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
!cast<Constant>(GEPI->getOperand(1))->isNullValue())
return false;
-
+
for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
I != E; ++I)
if (!isSafeSROAElementUse(*I))
@@ -393,11 +395,11 @@
///
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
- if (!isa<GetElementPtrInst>(U) &&
- (!isa<ConstantExpr>(U) ||
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
return false;
-
+
// Check to see if this ConstantExpr GEP is SRA'able. In particular, we
// don't like < 3 operand CE's, and we don't like non-constant integer
// indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
@@ -409,18 +411,18 @@
gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
++GEPI; // Skip over the pointer index.
-
+
// If this is a use of an array allocation, do a bit more checking for sanity.
if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
uint64_t NumElements = AT->getNumElements();
ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-
+
// Check to make sure that index falls within the array. If not,
// something funny is going on, so we won't do the optimization.
//
if (Idx->getZExtValue() >= NumElements)
return false;
-
+
// We cannot scalar repl this level of the array unless any array
// sub-indices are in-range constants. In particular, consider:
// A[0][i]. We cannot know that the user isn't doing invalid things like
@@ -441,7 +443,7 @@
"Indexed GEP type is not array, vector, or struct!");
continue;
}
-
+
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
return false;
@@ -465,7 +467,7 @@
}
return true;
}
-
+
/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
/// variable. This opens the door for other optimizations by exposing the
@@ -476,7 +478,7 @@
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
-
+
assert(GV->hasLocalLinkage() && !GV->isConstant());
Constant *Init = GV->getInitializer();
const Type *Ty = Init->getType();
@@ -488,7 +490,7 @@
unsigned StartAlignment = GV->getAlignment();
if (StartAlignment == 0)
StartAlignment = TD.getABITypeAlignment(GV->getType());
-
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
NewGlobals.reserve(STy->getNumElements());
const StructLayout &Layout = *TD.getStructLayout(STy);
@@ -503,7 +505,7 @@
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
-
+
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
@@ -522,7 +524,7 @@
if (NumElements > 16 && GV->hasNUsesOrMore(16))
return 0; // It's not worth it.
NewGlobals.reserve(NumElements);
-
+
uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
@@ -537,7 +539,7 @@
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
-
+
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
@@ -549,7 +551,7 @@
if (NewGlobals.empty())
return 0;
-
+
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -615,7 +617,7 @@
}
/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// value will trap if the value is dynamically null. PHIs keeps track of any
/// phi nodes we've seen to avoid reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSet<const PHINode*, 8> &PHIs) {
@@ -757,7 +759,7 @@
// Keep track of whether we are able to remove all the uses of the global
// other than the store that defines it.
bool AllNonStoreUsesGone = true;
-
+
// Replace all uses of loads with uses of uses of the stored value.
for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
User *GlobalUser = *GUI++;
@@ -830,7 +832,7 @@
ConstantInt *NElements,
TargetData* TD) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
-
+
const Type *GlobalType;
if (NElements->getZExtValue() == 1)
GlobalType = AllocTy;
@@ -840,14 +842,14 @@
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+ GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
GlobalType, false,
GlobalValue::InternalLinkage,
UndefValue::get(GlobalType),
GV->getName()+".body",
GV,
GV->isThreadLocal());
-
+
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
// other users to use the global as well.
@@ -867,10 +869,10 @@
User->replaceUsesOfWith(CI, TheBC);
}
}
-
+
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
- RepValue = ConstantExpr::getBitCast(RepValue,
+ RepValue = ConstantExpr::getBitCast(RepValue,
GV->getType()->getElementType());
// If there is a comparison against null, we will insert a global bool to
@@ -890,7 +892,7 @@
SI->eraseFromParent();
continue;
}
-
+
LoadInst *LI = cast<LoadInst>(GV->use_back());
while (!LI->use_empty()) {
Use &LoadUse = LI->use_begin().getUse();
@@ -898,7 +900,7 @@
LoadUse = RepValue;
continue;
}
-
+
ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
// Replace the cmp X, 0 with a use of the bool value.
Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
@@ -963,20 +965,20 @@
if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
continue; // Fine, ignore.
}
-
+
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
return false; // Storing the pointer itself... bad.
continue; // Otherwise, storing through it, or storing into GV... fine.
}
-
+
// Must index into the array and into the struct.
if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
return false;
continue;
}
-
+
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
// cycles.
@@ -985,13 +987,13 @@
return false;
continue;
}
-
+
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
return false;
continue;
}
-
+
return false;
}
return true;
@@ -1000,9 +1002,9 @@
/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
/// somewhere. Transform all uses of the allocation into loads from the
/// global and uses of the resultant pointer. Further, delete the store into
-/// GV. This assumes that these value pass the
+/// GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
-static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
while (!Alloc->use_empty()) {
Instruction *U = cast<Instruction>(*Alloc->use_begin());
@@ -1035,7 +1037,7 @@
continue;
}
}
-
+
// Insert a load from the global, and use it instead of the malloc.
Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
U->replaceUsesOfWith(Alloc, NL);
@@ -1053,24 +1055,24 @@
for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
++UI) {
const Instruction *User = cast<Instruction>(*UI);
-
+
// Comparison against null is ok.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
continue;
}
-
+
// getelementptr is also ok, but only a simple form.
if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
// Must index into the array and into the struct.
if (GEPI->getNumOperands() < 3)
return false;
-
+
// Otherwise the GEP is ok.
continue;
}
-
+
if (const PHINode *PN = dyn_cast<PHINode>(User)) {
if (!LoadUsingPHIsPerLoad.insert(PN))
// This means some phi nodes are dependent on each other.
@@ -1079,19 +1081,19 @@
if (!LoadUsingPHIs.insert(PN))
// If we have already analyzed this PHI, then it is safe.
continue;
-
+
// Make sure all uses of the PHI are simple enough to transform.
if (!LoadUsesSimpleEnoughForHeapSRA(PN,
LoadUsingPHIs, LoadUsingPHIsPerLoad))
return false;
-
+
continue;
}
-
+
// Otherwise we don't know what this is, not ok.
return false;
}
-
+
return true;
}
@@ -1110,10 +1112,10 @@
return false;
LoadUsingPHIsPerLoad.clear();
}
-
+
// If we reach here, we know that all uses of the loads and transitive uses
// (through PHI nodes) are simple enough to transform. However, we don't know
- // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
@@ -1121,29 +1123,29 @@
const PHINode *PN = *I;
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
-
+
// PHI of the stored value itself is ok.
if (InVal == StoredVal) continue;
-
+
if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
// One of the PHIs in our set is (optimistically) ok.
if (LoadUsingPHIs.count(InPN))
continue;
return false;
}
-
+
// Load from GV is ok.
if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
if (LI->getOperand(0) == GV)
continue;
-
+
// UNDEF? NULL?
-
+
// Anything else is rejected.
return false;
}
}
-
+
return true;
}
@@ -1151,15 +1153,15 @@
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
-
+
if (FieldNo >= FieldVals.size())
FieldVals.resize(FieldNo+1);
-
+
// If we already have this value, just reuse the previously scalarized
// version.
if (Value *FieldVal = FieldVals[FieldNo])
return FieldVal;
-
+
// Depending on what instruction this is, we have several cases.
Value *Result;
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
@@ -1172,9 +1174,9 @@
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- const StructType *ST =
+ const StructType *ST =
cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
-
+
Result =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
PN->getName()+".f"+Twine(FieldNo), PN);
@@ -1183,13 +1185,13 @@
llvm_unreachable("Unknown usable value");
Result = 0;
}
-
+
return FieldVals[FieldNo] = Result;
}
/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
/// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
// If this is a comparison against null, handle it.
@@ -1199,30 +1201,30 @@
// field.
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
InsertedScalarizedValues, PHIsToRewrite);
-
+
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
- Constant::getNullValue(NPtr->getType()),
+ Constant::getNullValue(NPtr->getType()),
SCI->getName());
SCI->replaceAllUsesWith(New);
SCI->eraseFromParent();
return;
}
-
+
// Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
&& "Unexpected GEPI!");
-
+
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
InsertedScalarizedValues, PHIsToRewrite);
-
+
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
-
+
Value *NGEPI = GetElementPtrInst::Create(NewPtr,
GEPIdx.begin(), GEPIdx.end(),
GEPI->getName(), GEPI);
@@ -1243,7 +1245,7 @@
tie(InsertPos, Inserted) =
InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
if (!Inserted) return;
-
+
// If this is the first time we've seen this PHI, recursively process all
// users.
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
@@ -1256,7 +1258,7 @@
/// is a value loaded from the global. Eliminate all uses of Ptr, making them
/// use FieldGlobals instead. All uses of loaded values satisfy
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
-static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
@@ -1264,7 +1266,7 @@
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
-
+
if (Load->use_empty()) {
Load->eraseFromParent();
InsertedScalarizedValues.erase(Load);
@@ -1289,11 +1291,11 @@
// new mallocs at the same place as CI, and N globals.
std::vector<Value*> FieldGlobals;
std::vector<Value*> FieldMallocs;
-
+
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
const Type *FieldTy = STy->getElementType(FieldNo);
const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
-
+
GlobalVariable *NGV =
new GlobalVariable(*GV->getParent(),
PFieldTy, false, GlobalValue::InternalLinkage,
@@ -1301,7 +1303,7 @@
GV->getName() + ".f" + Twine(FieldNo), GV,
GV->isThreadLocal());
FieldGlobals.push_back(NGV);
-
+
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (const StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
@@ -1313,7 +1315,7 @@
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
}
-
+
// The tricky aspect of this transformation is handling the case when malloc
// fails. In the original code, malloc failing would set the result pointer
// of malloc to null. In this case, some mallocs could succeed and others
@@ -1340,23 +1342,23 @@
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
-
+
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
"malloc_ret_null",
OrigBB->getParent());
-
+
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
// branch on RunningOr.
OrigBB->getTerminator()->eraseFromParent();
BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-
+
// Within the NullPtrBlock, we need to emit a comparison and branch for each
// pointer, because some may be null while others are not.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
- Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()),
"tmp");
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1371,10 +1373,10 @@
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
FreeBlock);
BranchInst::Create(NextBlock, FreeBlock);
-
+
NullPtrBlock = NextBlock;
}
-
+
BranchInst::Create(ContBB, NullPtrBlock);
// CI is no longer needed, remove it.
@@ -1385,25 +1387,25 @@
/// inserted for a given load.
DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
-
+
std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-
+
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
// of the per-field globals instead.
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
continue;
}
-
+
// Must be a store of null.
StoreInst *SI = cast<StoreInst>(User);
assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
"Unexpected heap-sra user!");
-
+
// Insert a store of null into each global.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
@@ -1430,7 +1432,7 @@
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
-
+
// Drop all inter-phi links and any loads that made it this far.
for (DenseMap<Value*, std::vector<Value*> >::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1440,7 +1442,7 @@
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->dropAllReferences();
}
-
+
// Delete all the phis and loads now that inter-references are dead.
for (DenseMap<Value*, std::vector<Value*> >::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1450,7 +1452,7 @@
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->eraseFromParent();
}
-
+
// The old global is now dead, remove it.
GV->eraseFromParent();
@@ -1468,7 +1470,7 @@
TargetData *TD) {
if (!TD)
return false;
-
+
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1508,7 +1510,7 @@
GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
return true;
}
-
+
// If the allocation is an array of structures, consider transforming this
// into multiple malloc'd arrays, one for each field. This is basically
// SRoA for malloc'd memory.
@@ -1544,13 +1546,13 @@
CI = dyn_cast<BitCastInst>(Malloc) ?
extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
}
-
+
GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
return true;
}
-
+
return false;
-}
+}
// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
// that only one value (besides its initializer) is ever stored to the global.
@@ -1568,7 +1570,7 @@
GV->getInitializer()->isNullValue()) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
- SOVC =
+ SOVC =
ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
@@ -1576,7 +1578,7 @@
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
const Type* MallocType = getMallocAllocatedType(CI);
- if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
GVI, TD))
return true;
}
@@ -1591,7 +1593,7 @@
/// whenever it is used. This exposes the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
const Type *GVElType = GV->getType()->getElementType();
-
+
// If GVElType is already i1, it is already shrunk. If the type of the GV is
// an FP value, pointer or vector, don't do this optimization because a select
// between them is very expensive and unlikely to lead to later
@@ -1611,11 +1613,11 @@
}
DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
-
+
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
false,
- GlobalValue::InternalLinkage,
+ GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
GV->isThreadLocal());
@@ -1716,11 +1718,11 @@
<< GS.AccessingFunction->getName() << "\n");
DEBUG(dbgs() << " HasMultipleAccessingFunctions = "
<< GS.HasMultipleAccessingFunctions << "\n");
- DEBUG(dbgs() << " HasNonInstructionUser = "
+ DEBUG(dbgs() << " HasNonInstructionUser = "
<< GS.HasNonInstructionUser<<"\n");
DEBUG(dbgs() << "\n");
#endif
-
+
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive we can make
// this global a local variable) we replace the global with a local alloca
@@ -1750,7 +1752,7 @@
++NumLocalized;
return true;
}
-
+
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.isLoaded) {
@@ -1943,9 +1945,10 @@
if (!FTy || !FTy->getReturnType()->isVoidTy() ||
FTy->isVarArg() || FTy->getNumParams() != 0)
return 0;
-
- // Verify that the initializer is simple enough for us to handle.
- if (!I->hasDefinitiveInitializer()) return 0;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!I->hasUniqueInitializer()) return 0;
ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
if (!CA) return 0;
for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -1956,7 +1959,7 @@
// Must have a function or null ptr.
if (!isa<Function>(CS->getOperand(1)))
return 0;
-
+
// Init priority must be standard.
ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
if (!CI || CI->getZExtValue() != 65535)
@@ -1964,7 +1967,7 @@
} else {
return 0;
}
-
+
return I;
}
return 0;
@@ -1985,13 +1988,13 @@
/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
/// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
const std::vector<Function*> &Ctors) {
// If we made a change, reassemble the initializer list.
std::vector<Constant*> CSVals;
CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
CSVals.push_back(0);
-
+
// Create the new init list.
std::vector<Constant*> CAList;
for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
@@ -2007,26 +2010,26 @@
}
CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
}
-
+
// Create the array initializer.
const Type *StructTy =
cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
- Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
CAList.size()), CAList);
-
+
// If we didn't change the number of elements, don't create a new GV.
if (CA->getType() == GCL->getInitializer()->getType()) {
GCL->setInitializer(CA);
return GCL;
}
-
+
// Create the new global and insert it next to the existing list.
GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
GCL->getLinkage(), CA, "",
GCL->isThreadLocal());
GCL->getParent()->getGlobalList().insert(GCL, NGV);
NGV->takeName(GCL);
-
+
// Nuke the old list, replacing any uses with the new one.
if (!GCL->use_empty()) {
Constant *V = NGV;
@@ -2035,7 +2038,7 @@
GCL->replaceAllUsesWith(V);
}
GCL->eraseFromParent();
-
+
if (Ctors.size())
return NGV;
else
@@ -2062,9 +2065,9 @@
return false;
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
- return GV->hasDefinitiveInitializer();
+ return GV->hasUniqueInitializer();
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
// Handle a constantexpr gep.
@@ -2072,13 +2075,13 @@
isa<GlobalVariable>(CE->getOperand(0)) &&
cast<GEPOperator>(CE)->isInBounds()) {
GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
- if (!GV->hasDefinitiveInitializer())
+ if (!GV->hasUniqueInitializer())
return false;
// The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin()));
+ ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
if (!CI || !CI->isZero()) return false;
// The remaining indices must be compile-time known integers within the
@@ -2101,7 +2104,7 @@
assert(Val->getType() == Init->getType() && "Type mismatch!");
return Val;
}
-
+
std::vector<Constant*> Elts;
if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
@@ -2119,13 +2122,13 @@
llvm_unreachable("This code is out of sync with "
" ConstantFoldLoadThroughGEPConstantExpr");
}
-
+
// Replace the element that we are supposed to.
ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
unsigned Idx = CU->getZExtValue();
assert(Idx < STy->getNumElements() && "Struct index out of range!");
Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
-
+
// Return the modified struct.
return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
STy->isPacked());
@@ -2138,8 +2141,8 @@
NumElts = ATy->getNumElements();
else
NumElts = cast<VectorType>(InitTy)->getNumElements();
-
-
+
+
// Break up the array into elements.
if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -2154,16 +2157,16 @@
" ConstantFoldLoadThroughGEPConstantExpr");
Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
}
-
+
assert(CI->getZExtValue() < NumElts);
Elts[CI->getZExtValue()] =
EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
-
+
if (Init->getType()->isArrayTy())
return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
else
return ConstantVector::get(&Elts[0], Elts.size());
- }
+ }
}
/// CommitValueTo - We have decided that Addr (which satisfies the predicate
@@ -2189,14 +2192,14 @@
// is the most up-to-date.
DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
if (I != Memory.end()) return I->second;
-
+
// Access it.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
if (GV->hasDefinitiveInitializer())
return GV->getInitializer();
return 0;
}
-
+
// Handle a constantexpr getelementptr.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -2221,12 +2224,12 @@
// bail out. TODO: we might want to accept limited recursion.
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
return false;
-
+
CallStack.push_back(F);
-
+
/// Values - As we compute SSA register values, we store their contents here.
DenseMap<Value*, Constant*> Values;
-
+
// Initialize arguments to the incoming values specified.
unsigned ArgNo = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
@@ -2237,14 +2240,14 @@
/// we can only evaluate any one basic block at most once. This set keeps
/// track of what we have executed so we can detect recursive cases etc.
SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
+
// CurInst - The current instruction we're evaluating.
BasicBlock::iterator CurInst = F->begin()->begin();
-
+
// This is the main evaluation loop.
while (1) {
Constant *InstResult = 0;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (SI->isVolatile()) return false; // no volatile accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
@@ -2290,7 +2293,7 @@
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
AI->getName()));
- InstResult = AllocaTmps.back();
+ InstResult = AllocaTmps.back();
} else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
// Debug info can safely be ignored here.
@@ -2324,7 +2327,7 @@
} else {
if (Callee->getFunctionType()->isVarArg())
return false;
-
+
Constant *RetVal;
// Execute the call, if successful, use the return value.
if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
@@ -2342,7 +2345,7 @@
dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
if (!Cond) return false; // Cannot determine.
- NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ NewBB = BI->getSuccessor(!Cond->getZExtValue());
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
ConstantInt *Val =
@@ -2358,20 +2361,20 @@
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
if (RI->getNumOperands())
RetVal = getVal(Values, RI->getOperand(0));
-
+
CallStack.pop_back(); // return from fn.
return true; // We succeeded at evaluating this ctor!
} else {
// invoke, unwind, unreachable.
return false; // Cannot handle this terminator.
}
-
+
// Okay, we succeeded in evaluating this control flow. See if we have
// executed the new block before. If so, we have a looping function,
// which we cannot evaluate in reasonable time.
if (!ExecutedBlocks.insert(NewBB))
return false; // looped!
-
+
// Okay, we have never been in this block before. Check to see if there
// are any PHI nodes. If so, evaluate them with information about where
// we came from.
@@ -2387,10 +2390,10 @@
// Did not know how to evaluate this!
return false;
}
-
+
if (!CurInst->use_empty())
Values[CurInst] = InstResult;
-
+
// Advance program counter.
++CurInst;
}
@@ -2408,7 +2411,7 @@
/// to represent its body. This vector is needed so we can delete the
/// temporary globals when we are done.
std::vector<GlobalVariable*> AllocaTmps;
-
+
/// CallStack - This is used to detect recursion. In pathological situations
/// we could hit exponential behavior, but at least there is nothing
/// unbounded.
@@ -2428,13 +2431,13 @@
E = MutatedMemory.end(); I != E; ++I)
CommitValueTo(I->second, I->first);
}
-
+
// At this point, we are done interpreting. If we created any 'alloca'
// temporaries, release them now.
while (!AllocaTmps.empty()) {
GlobalVariable *Tmp = AllocaTmps.back();
AllocaTmps.pop_back();
-
+
// If there are still users of the alloca, the program is doing something
// silly, e.g. storing the address of the alloca somewhere and using it
// later. Since this is undefined, we'll just make it be null.
@@ -2442,7 +2445,7 @@
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
delete Tmp;
}
-
+
return EvalSuccess;
}
@@ -2454,7 +2457,7 @@
std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
bool MadeChange = false;
if (Ctors.empty()) return false;
-
+
// Loop over global ctors, optimizing them when we can.
for (unsigned i = 0; i != Ctors.size(); ++i) {
Function *F = Ctors[i];
@@ -2467,10 +2470,10 @@
}
break;
}
-
+
// We cannot simplify external ctor functions.
if (F->empty()) continue;
-
+
// If we can evaluate the ctor at compile time, do.
if (EvaluateStaticConstructor(F)) {
Ctors.erase(Ctors.begin()+i);
@@ -2480,9 +2483,9 @@
continue;
}
}
-
+
if (!MadeChange) return false;
-
+
GCL = InstallGlobalCtors(GCL, Ctors);
return true;
}
@@ -2546,21 +2549,21 @@
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
-
+
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
-
+
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M);
-
+
// Optimize global_ctors list.
if (GlobalCtors)
LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
-
+
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M);
@@ -2568,9 +2571,9 @@
LocalChange |= OptimizeGlobalAliases(M);
Changed |= LocalChange;
}
-
+
// TODO: Move all global ctors functions to the end of the module for code
// layout.
-
+
return Changed;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/IPConstantPropagation.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
///
struct IPCP : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- IPCP() : ModulePass(&ID) {}
+ IPCP() : ModulePass(ID) {
+ initializeIPCPPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
private:
@@ -46,7 +48,7 @@
char IPCP::ID = 0;
INITIALIZE_PASS(IPCP, "ipconstprop",
- "Interprocedural constant propagation", false, false);
+ "Interprocedural constant propagation", false, false)
ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/IPO.cpp Tue Oct 26 19:48:03 2010
@@ -7,17 +7,52 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMIPO.a, which implements
-// several transformations over the LLVM intermediate representation.
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
+void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeArgPromotionPass(Registry);
+ initializeConstantMergePass(Registry);
+ initializeDAEPass(Registry);
+ initializeDAHPass(Registry);
+ initializeDTEPass(Registry);
+ initializeFunctionAttrsPass(Registry);
+ initializeGlobalDCEPass(Registry);
+ initializeGlobalOptPass(Registry);
+ initializeIPCPPass(Registry);
+ initializeAlwaysInlinerPass(Registry);
+ initializeSimpleInlinerPass(Registry);
+ initializeInternalizePassPass(Registry);
+ initializeLoopExtractorPass(Registry);
+ initializeBlockExtractorPassPass(Registry);
+ initializeSingleLoopExtractorPass(Registry);
+ initializeLowerSetJmpPass(Registry);
+ initializeMergeFunctionsPass(Registry);
+ initializePartialInlinerPass(Registry);
+ initializePartSpecPass(Registry);
+ initializePruneEHPass(Registry);
+ initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripSymbolsPass(Registry);
+ initializeStripDebugDeclarePass(Registry);
+ initializeStripDeadDebugInfoPass(Registry);
+ initializeStripNonDebugSymbolsPass(Registry);
+ initializeSRETPromotionPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+ initializeIPO(*unwrap(R));
+}
+
void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createArgumentPromotionPass());
}
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/InlineAlways.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(&ID, -2000000000) {}
+ AlwaysInliner() : Inliner(ID, -2000000000) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +63,11 @@
}
char AlwaysInliner::ID = 0;
-INITIALIZE_PASS(AlwaysInliner, "always-inline",
- "Inliner for always_inline functions", false, false);
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/InlineSimple.cpp Tue Oct 26 19:48:03 2010
@@ -33,8 +33,12 @@
SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- SimpleInliner() : Inliner(&ID) {}
- SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {}
+ SimpleInliner() : Inliner(ID) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +60,11 @@
}
char SimpleInliner::ID = 0;
-INITIALIZE_PASS(SimpleInliner, "inline",
- "Function Integration/Inlining", false, false);
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/Inliner.cpp Tue Oct 26 19:48:03 2010
@@ -48,10 +48,10 @@
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
-Inliner::Inliner(void *ID)
+Inliner::Inliner(char &ID)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
-Inliner::Inliner(void *ID, int Threshold)
+Inliner::Inliner(char &ID, int Threshold)
: CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/Internalize.cpp Tue Oct 26 19:48:03 2010
@@ -64,10 +64,11 @@
char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
- "Internalize Global Symbols", false, false);
+ "Internalize Global Symbols", false, false)
InternalizePass::InternalizePass(bool AllButMain)
- : ModulePass(&ID), AllButMain(AllButMain){
+ : ModulePass(ID), AllButMain(AllButMain){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
if (!APIList.empty()) // If a list is specified, use it as well.
@@ -75,7 +76,8 @@
}
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
- : ModulePass(&ID), AllButMain(false){
+ : ModulePass(ID), AllButMain(false){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/LoopExtractor.cpp Tue Oct 26 19:48:03 2010
@@ -37,7 +37,9 @@
unsigned NumLoops;
explicit LoopExtractor(unsigned numLoops = ~0)
- : LoopPass(&ID), NumLoops(numLoops) {}
+ : LoopPass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -50,8 +52,13 @@
}
char LoopExtractor::ID = 0;
-INITIALIZE_PASS(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false);
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -63,7 +70,7 @@
char SingleLoopExtractor::ID = 0;
INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
- "Extract at most one loop into a new function", false, false);
+ "Extract at most one loop into a new function", false, false)
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
@@ -147,7 +154,7 @@
std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
public:
static char ID; // Pass identification, replacement for typeid
- BlockExtractorPass() : ModulePass(&ID) {
+ BlockExtractorPass() : ModulePass(ID) {
if (!BlockFile.empty())
LoadFile(BlockFile.c_str());
}
@@ -159,7 +166,7 @@
char BlockExtractorPass::ID = 0;
INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
"Extract Basic Blocks From Module (for bugpoint use)",
- false, false);
+ false, false)
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/LowerSetJmp.cpp Tue Oct 26 19:48:03 2010
@@ -109,7 +109,9 @@
bool IsTransformableFunction(StringRef Name);
public:
static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(&ID) {}
+ LowerSetJmp() : ModulePass(ID) {
+ initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+ }
void visitCallInst(CallInst& CI);
void visitInvokeInst(InvokeInst& II);
@@ -122,7 +124,7 @@
} // end anonymous namespace
char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
// run - Run the transformation on the program. We grab the function
// prototypes for longjmp and setjmp. If they are used in the program,
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/MergeFunctions.cpp Tue Oct 26 19:48:03 2010
@@ -29,44 +29,27 @@
//
// Many functions have their address taken by the virtual function table for
// the object they belong to. However, as long as it's only used for a lookup
-// and call, this is irrelevant, and we'd like to fold such implementations.
+// and call, this is irrelevant, and we'd like to fold such functions.
//
-// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
//
-// The current implementation loops over a pair-wise comparison of all
-// functions in the program where the two functions in the pair are treated as
-// assumed to be equal until proven otherwise. We could both use fewer
-// comparisons and optimize more complex cases if we used strongly connected
-// components of the call graph.
-//
-// * be smarter about bitcast.
+// * be smarter about bitcasts.
//
// In order to fold functions, we will sometimes add either bitcast instructions
// or bitcast constant expressions. Unfortunately, this can confound further
// analysis since the two functions differ where one has a bitcast and the
-// other doesn't. We should learn to peer through bitcasts without imposing bad
-// performance properties.
-//
-// * don't emit aliases for Mach-O.
-//
-// Mach-O doesn't support aliases which means that we must avoid introducing
-// them in the bitcode on architectures which don't support them, such as
-// Mac OSX. There's a few approaches to this problem;
-// a) teach codegen to lower global aliases to thunks on platforms which don't
-// support them.
-// b) always emit thunks, and create a separate thunk-to-alias pass which
-// runs on ELF systems. This has the added benefit of transforming other
-// thunks such as those produced by a C++ frontend into aliases when legal
-// to do so.
+// other doesn't. We should learn to look through bitcasts.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Constants.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
@@ -76,52 +59,21 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
-#include <map>
#include <vector>
using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
-namespace {
- class MergeFunctions : public ModulePass {
- public:
- static char ID; // Pass identification, replacement for typeid
- MergeFunctions() : ModulePass(&ID) {}
-
- bool runOnModule(Module &M);
-
- private:
- bool isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2);
-
- bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
- bool equals(const Function *F, const Function *G);
-
- bool compare(const Value *V1, const Value *V2);
-
- const Function *LHS, *RHS;
- typedef DenseMap<const Value *, unsigned long> IDMap;
- IDMap Map;
- DenseMap<const Function *, IDMap> Domains;
- DenseMap<const Function *, unsigned long> DomainCount;
- TargetData *TD;
- };
-}
-
-char MergeFunctions::ID = 0;
-INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
-
-ModulePass *llvm::createMergeFunctionsPass() {
- return new MergeFunctions();
-}
-
-// ===----------------------------------------------------------------------===
-// Comparison of functions
-// ===----------------------------------------------------------------------===
-
-static unsigned long hash(const Function *F) {
+/// ProfileFunction - Creates a hash-code for the function which is the same
+/// for any two functions that will compare equal, without looking at the
+/// instructions inside the function.
+static unsigned ProfileFunction(const Function *F) {
const FunctionType *FTy = F->getFunctionType();
FoldingSetNodeID ID;
@@ -135,9 +87,159 @@
return ID.ComputeHash();
}
-/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
-/// type equivalence rules apply.
-static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+namespace {
+
+class ComparableFunction {
+public:
+ static const ComparableFunction EmptyKey;
+ static const ComparableFunction TombstoneKey;
+
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
+
+ Function *getFunc() const { return Func; }
+ unsigned getHash() const { return Hash; }
+ TargetData *getTD() const { return TD; }
+
+ // Drops AssertingVH reference to the function. Outside of debug mode, this
+ // does nothing.
+ void release() {
+ assert(Func &&
+ "Attempted to release function twice, or release empty/tombstone!");
+ Func = NULL;
+ }
+
+private:
+ explicit ComparableFunction(unsigned Hash)
+ : Func(NULL), Hash(Hash), TD(NULL) {}
+
+ AssertingVH<Function> Func;
+ unsigned Hash;
+ TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+ ComparableFunction(1);
+
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<ComparableFunction> {
+ static ComparableFunction getEmptyKey() {
+ return ComparableFunction::EmptyKey;
+ }
+ static ComparableFunction getTombstoneKey() {
+ return ComparableFunction::TombstoneKey;
+ }
+ static unsigned getHashValue(const ComparableFunction &CF) {
+ return CF.getHash();
+ }
+ static bool isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS);
+ };
+}
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+ static char ID;
+ MergeFunctions() : ModulePass(ID) {
+ initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+private:
+ typedef DenseSet<ComparableFunction> FnSetType;
+
+
+ /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// equal to one that's already present.
+ bool Insert(FnSetType &FnSet, ComparableFunction &NewF);
+
+ /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
+ /// may be deleted, or may be converted into a thunk. In either case, it
+ /// should never be visited again.
+ void MergeTwoFunctions(Function *F, Function *G) const;
+
+ /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
+ /// replace direct uses of G with bitcast(F). Deletes G.
+ void WriteThunk(Function *F, Function *G) const;
+
+ TargetData *TD;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+namespace {
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const TargetData *TD, const Function *F1,
+ const Function *F2)
+ : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+
+ /// Compare - test whether the two functions have equivalent behaviour.
+ bool Compare();
+
+private:
+ /// Compare - test whether two basic blocks have equivalent behaviour.
+ bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+ /// Enumerate - Assign or look up previously assigned numbers for the two
+ /// values, and return whether the numbers are equal. Numbers are assigned in
+ /// the order visited.
+ bool Enumerate(const Value *V1, const Value *V2);
+
+ /// isEquivalentOperation - Compare two Instructions for equivalence, similar
+ /// to Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ bool isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const;
+
+ /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+ }
+
+ /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+ bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+
+ // The two functions undergoing comparison.
+ const Function *F1, *F2;
+
+ const TargetData *TD;
+
+ typedef DenseMap<const Value *, unsigned long> IDMap;
+ IDMap Map1, Map2;
+ unsigned long IDMap1Count, IDMap2Count;
+};
+}
+
+/// isEquivalentType - any two pointers in the same address space are
+/// equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+ const Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID())
@@ -184,21 +286,6 @@
return true;
}
- case Type::UnionTyID: {
- const UnionType *UTy1 = cast<UnionType>(Ty1);
- const UnionType *UTy2 = cast<UnionType>(Ty2);
-
- // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
- if (UTy1->getNumElements() != UTy2->getNumElements())
- return false;
-
- for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
- if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
- return false;
- }
- return true;
- }
-
case Type::FunctionTyID: {
const FunctionType *FTy1 = cast<FunctionType>(Ty1);
const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -222,6 +309,7 @@
return ATy1->getNumElements() == ATy2->getNumElements() &&
isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
}
+
case Type::VectorTyID: {
const VectorType *VTy1 = cast<VectorType>(Ty1);
const VectorType *VTy2 = cast<VectorType>(Ty2);
@@ -234,8 +322,8 @@
/// isEquivalentOperation - determine whether the two operations are the same
/// except that pointer-to-A and pointer-to-B are equivalent. This should be
/// kept in sync with Instruction::isSameOperationAs.
-static bool
-isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const {
if (I1->getOpcode() != I2->getOpcode() ||
I1->getNumOperands() != I2->getNumOperands() ||
!isEquivalentType(I1->getType(), I2->getType()) ||
@@ -287,18 +375,15 @@
return true;
}
-bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2) {
+/// isEquivalentGEP - determine whether two GEP operations perform the same
+/// underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+ const GEPOperator *GEP2) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
- SmallVector<Value *, 8> Indices1, Indices2;
- for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
- E = GEP1->idx_end(); I != E; ++I) {
- Indices1.push_back(*I);
- }
- for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
- E = GEP2->idx_end(); I != E; ++I) {
- Indices2.push_back(*I);
- }
+ SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+ SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
Indices1.data(), Indices1.size());
uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
@@ -306,7 +391,6 @@
return Offset1 == Offset2;
}
- // Equivalent types aren't enough.
if (GEP1->getPointerOperand()->getType() !=
GEP2->getPointerOperand()->getType())
return false;
@@ -315,19 +399,26 @@
return false;
for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+ if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
return false;
}
return true;
}
-bool MergeFunctions::compare(const Value *V1, const Value *V2) {
- if (V1 == LHS || V1 == RHS)
- if (V2 == LHS || V2 == RHS)
- return true;
+/// Enumerate - Compare two values used by the two functions under pair-wise
+/// comparison. If this is the first time the values are seen, they're added to
+/// the mapping so that we will detect mismatches on next use.
+bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+ // Check for function @f1 referring to itself and function @f2 referring to
+ // itself, or referring to each other, or both referring to either of them.
+ // They're all equivalent if the two functions are otherwise equivalent.
+ if (V1 == F1 && V2 == F2)
+ return true;
+ if (V1 == F2 && V2 == F1)
+ return true;
- // TODO: constant expressions in terms of LHS and RHS
+ // TODO: constant expressions with GEP or references to F1 or F2.
if (isa<Constant>(V1))
return V1 == V2;
@@ -338,228 +429,138 @@
IA1->getConstraintString() == IA2->getConstraintString();
}
- // We enumerate constants globally and arguments, basic blocks or
- // instructions within the function they belong to.
- const Function *Domain1 = NULL;
- if (const Argument *A = dyn_cast<Argument>(V1)) {
- Domain1 = A->getParent();
- } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
- Domain1 = BB->getParent();
- } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
- Domain1 = I->getParent()->getParent();
- }
-
- const Function *Domain2 = NULL;
- if (const Argument *A = dyn_cast<Argument>(V2)) {
- Domain2 = A->getParent();
- } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
- Domain2 = BB->getParent();
- } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
- Domain2 = I->getParent()->getParent();
- }
-
- if (Domain1 != Domain2)
- if (Domain1 != LHS && Domain1 != RHS)
- if (Domain2 != LHS && Domain2 != RHS)
- return false;
-
- IDMap &Map1 = Domains[Domain1];
unsigned long &ID1 = Map1[V1];
if (!ID1)
- ID1 = ++DomainCount[Domain1];
+ ID1 = ++IDMap1Count;
- IDMap &Map2 = Domains[Domain2];
unsigned long &ID2 = Map2[V2];
if (!ID2)
- ID2 = ++DomainCount[Domain2];
+ ID2 = ++IDMap2Count;
return ID1 == ID2;
}
-bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
- BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
- BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+/// Compare - test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+ BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
do {
- if (!compare(FI, GI))
+ if (!Enumerate(F1I, F2I))
return false;
- if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
- const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
- const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+ if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+ const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+ if (!GEP2)
+ return false;
- if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
return false;
if (!isEquivalentGEP(GEP1, GEP2))
return false;
} else {
- if (!isEquivalentOperation(FI, GI))
+ if (!isEquivalentOperation(F1I, F2I))
return false;
- for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
- Value *OpF = FI->getOperand(i);
- Value *OpG = GI->getOperand(i);
+ assert(F1I->getNumOperands() == F2I->getNumOperands());
+ for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+ Value *OpF1 = F1I->getOperand(i);
+ Value *OpF2 = F2I->getOperand(i);
- if (!compare(OpF, OpG))
+ if (!Enumerate(OpF1, OpF2))
return false;
- if (OpF->getValueID() != OpG->getValueID() ||
- !isEquivalentType(OpF->getType(), OpG->getType()))
+ if (OpF1->getValueID() != OpF2->getValueID() ||
+ !isEquivalentType(OpF1->getType(), OpF2->getType()))
return false;
}
}
- ++FI, ++GI;
- } while (FI != FE && GI != GE);
+ ++F1I, ++F2I;
+ } while (F1I != F1E && F2I != F2E);
- return FI == FE && GI == GE;
+ return F1I == F1E && F2I == F2E;
}
-bool MergeFunctions::equals(const Function *F, const Function *G) {
+/// Compare - test whether the two functions have equivalent behaviour.
+bool FunctionComparator::Compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
- if (F->getAttributes() != G->getAttributes())
+ if (F1->getAttributes() != F2->getAttributes())
return false;
- if (F->hasGC() != G->hasGC())
+ if (F1->hasGC() != F2->hasGC())
return false;
- if (F->hasGC() && F->getGC() != G->getGC())
+ if (F1->hasGC() && F1->getGC() != F2->getGC())
return false;
- if (F->hasSection() != G->hasSection())
+ if (F1->hasSection() != F2->hasSection())
return false;
- if (F->hasSection() && F->getSection() != G->getSection())
+ if (F1->hasSection() && F1->getSection() != F2->getSection())
return false;
- if (F->isVarArg() != G->isVarArg())
+ if (F1->isVarArg() != F2->isVarArg())
return false;
// TODO: if it's internal and only used in direct calls, we could handle this
// case too.
- if (F->getCallingConv() != G->getCallingConv())
+ if (F1->getCallingConv() != F2->getCallingConv())
return false;
- if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
+ if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
return false;
- assert(F->arg_size() == G->arg_size() &&
- "Identical functions have a different number of args.");
-
- LHS = F;
- RHS = G;
+ assert(F1->arg_size() == F2->arg_size() &&
+ "Identically typed functions have different numbers of args!");
// Visit the arguments so that they get enumerated in the order they're
// passed in.
- for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
- fe = F->arg_end(); fi != fe; ++fi, ++gi) {
- if (!compare(fi, gi))
- llvm_unreachable("Arguments repeat");
- }
-
- SmallVector<const BasicBlock *, 8> FBBs, GBBs;
- SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
- FBBs.push_back(&F->getEntryBlock());
- GBBs.push_back(&G->getEntryBlock());
- VisitedBBs.insert(FBBs[0]);
- while (!FBBs.empty()) {
- const BasicBlock *FBB = FBBs.pop_back_val();
- const BasicBlock *GBB = GBBs.pop_back_val();
- if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
- Domains.clear();
- DomainCount.clear();
- return false;
- }
- const TerminatorInst *FTI = FBB->getTerminator();
- const TerminatorInst *GTI = GBB->getTerminator();
- assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
- for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(FTI->getSuccessor(i)))
- continue;
- FBBs.push_back(FTI->getSuccessor(i));
- GBBs.push_back(GTI->getSuccessor(i));
- }
+ for (Function::const_arg_iterator f1i = F1->arg_begin(),
+ f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+ if (!Enumerate(f1i, f2i))
+ llvm_unreachable("Arguments repeat!");
}
- Domains.clear();
- DomainCount.clear();
- return true;
-}
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
-// ===----------------------------------------------------------------------===
-// Folding of functions
-// ===----------------------------------------------------------------------===
-
-// Cases:
-// * F is external strong, G is external strong:
-// turn G into a thunk to F (1)
-// * F is external strong, G is external weak:
-// turn G into a thunk to F (1)
-// * F is external weak, G is external weak:
-// unfoldable
-// * F is external strong, G is internal:
-// address of G taken:
-// turn G into a thunk to F (1)
-// address of G not taken:
-// make G an alias to F (2)
-// * F is internal, G is external weak
-// address of F is taken:
-// turn G into a thunk to F (1)
-// address of F is not taken:
-// make G an alias of F (2)
-// * F is internal, G is internal:
-// address of F and G are taken:
-// turn G into a thunk to F (1)
-// address of G is not taken:
-// make G an alias to F (2)
-//
-// alias requires linkage == (external,local,weak) fallback to creating a thunk
-// external means 'externally visible' linkage != (internal,private)
-// internal means linkage == (internal,private)
-// weak means linkage mayBeOverridable
-// being external implies that the address is taken
-//
-// 1. turn G into a thunk to F
-// 2. make G an alias to F
+ F1BBs.push_back(&F1->getEntryBlock());
+ F2BBs.push_back(&F2->getEntryBlock());
-enum LinkageCategory {
- ExternalStrong,
- ExternalWeak,
- Internal
-};
+ VisitedBBs.insert(F1BBs[0]);
+ while (!F1BBs.empty()) {
+ const BasicBlock *F1BB = F1BBs.pop_back_val();
+ const BasicBlock *F2BB = F2BBs.pop_back_val();
-static LinkageCategory categorize(const Function *F) {
- switch (F->getLinkage()) {
- case GlobalValue::InternalLinkage:
- case GlobalValue::PrivateLinkage:
- case GlobalValue::LinkerPrivateLinkage:
- return Internal;
-
- case GlobalValue::WeakAnyLinkage:
- case GlobalValue::WeakODRLinkage:
- case GlobalValue::ExternalWeakLinkage:
- case GlobalValue::LinkerPrivateWeakLinkage:
- return ExternalWeak;
-
- case GlobalValue::ExternalLinkage:
- case GlobalValue::AvailableExternallyLinkage:
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::LinkOnceODRLinkage:
- case GlobalValue::AppendingLinkage:
- case GlobalValue::DLLImportLinkage:
- case GlobalValue::DLLExportLinkage:
- case GlobalValue::CommonLinkage:
- return ExternalStrong;
- }
+ if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+ return false;
- llvm_unreachable("Unknown LinkageType.");
- return ExternalWeak;
+ const TerminatorInst *F1TI = F1BB->getTerminator();
+ const TerminatorInst *F2TI = F2BB->getTerminator();
+
+ assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+ for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ continue;
+
+ F1BBs.push_back(F1TI->getSuccessor(i));
+ F2BBs.push_back(F2TI->getSuccessor(i));
+ }
+ }
+ return true;
}
-static void ThunkGToF(Function *F, Function *G) {
+/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
+/// direct uses of G with bitcast(F). Deletes G.
+void MergeFunctions::WriteThunk(Function *F, Function *G) const {
if (!G->mayBeOverridden()) {
// Redirect direct callers of G to F.
Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
@@ -573,188 +574,212 @@
}
}
+ // If G was internal then we may have replaced all uses of G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
G->getParent());
BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
SmallVector<Value *, 16> Args;
unsigned i = 0;
const FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- if (FFTy->getParamType(i) == AI->getType()) {
- Args.push_back(AI);
- } else {
- Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
- }
+ Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
++i;
}
- CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+ CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
if (NewG->getReturnType()->isVoidTy()) {
- ReturnInst::Create(F->getContext(), BB);
- } else if (CI->getType() != NewG->getReturnType()) {
- Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
- ReturnInst::Create(F->getContext(), BCI, BB);
+ Builder.CreateRetVoid();
} else {
- ReturnInst::Create(F->getContext(), CI, BB);
+ Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
}
NewG->copyAttributesFrom(G);
NewG->takeName(G);
G->replaceAllUsesWith(NewG);
G->eraseFromParent();
-}
-static void AliasGToF(Function *F, Function *G) {
- // Darwin will trigger llvm_unreachable if asked to codegen an alias.
- return ThunkGToF(F, G);
-
-#if 0
- if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
- return ThunkGToF(F, G);
-
- GlobalAlias *GA = new GlobalAlias(
- G->getType(), G->getLinkage(), "",
- ConstantExpr::getBitCast(F, G->getType()), G->getParent());
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
- GA->takeName(G);
- GA->setVisibility(G->getVisibility());
- G->replaceAllUsesWith(GA);
- G->eraseFromParent();
-#endif
+ DEBUG(dbgs() << "WriteThunk: " << NewG->getName() << '\n');
+ ++NumThunksWritten;
}
-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
- Function *F = FnVec[i];
- Function *G = FnVec[j];
-
- LinkageCategory catF = categorize(F);
- LinkageCategory catG = categorize(G);
-
- if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(catF, catG);
- }
-
- switch (catF) {
- case ExternalStrong:
- switch (catG) {
- case ExternalStrong:
- case ExternalWeak:
- ThunkGToF(F, G);
- break;
- case Internal:
- if (G->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- }
- break;
-
- case ExternalWeak: {
- assert(catG == ExternalWeak);
+/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
+/// Function G is deleted.
+void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
+ if (F->mayBeOverridden()) {
+ assert(G->mayBeOverridden());
// Make them both thunks to the same internal function.
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
F->getParent());
H->copyAttributesFrom(F);
H->takeName(F);
F->replaceAllUsesWith(H);
- ThunkGToF(F, G);
- ThunkGToF(F, H);
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
- F->setLinkage(GlobalValue::InternalLinkage);
- } break;
+ WriteThunk(F, G);
+ WriteThunk(F, H);
- case Internal:
- switch (catG) {
- case ExternalStrong:
- llvm_unreachable(0);
- // fall-through
- case ExternalWeak:
- if (F->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- case Internal: {
- bool addrTakenF = F->hasAddressTaken();
- bool addrTakenG = G->hasAddressTaken();
- if (!addrTakenF && addrTakenG) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(addrTakenF, addrTakenG);
- }
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::InternalLinkage);
- if (addrTakenF && addrTakenG) {
- ThunkGToF(F, G);
- } else {
- assert(!addrTakenG);
- AliasGToF(F, G);
- }
- } break;
- } break;
+ ++NumDoubleWeak;
+ } else {
+ WriteThunk(F, G);
}
++NumFunctionsMerged;
- return true;
}
-// ===----------------------------------------------------------------------===
-// Pass definition
-// ===----------------------------------------------------------------------===
+// Insert - Insert a ComparableFunction into the FnSet, or merge it away if
+// equal to one that's already inserted.
+bool MergeFunctions::Insert(FnSetType &FnSet, ComparableFunction &NewF) {
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (Result.second)
+ return false;
-bool MergeFunctions::runOnModule(Module &M) {
- bool Changed = false;
+ const ComparableFunction &OldF = *Result.first;
- std::map<unsigned long, std::vector<Function *> > FnMap;
+ // Never thunk a strong function to a weak function.
+ assert(!OldF.getFunc()->mayBeOverridden() ||
+ NewF.getFunc()->mayBeOverridden());
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration())
- continue;
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
+ << NewF.getFunc()->getName() << '\n');
- FnMap[hash(F)].push_back(F);
- }
+ Function *DeleteF = NewF.getFunc();
+ NewF.release();
+ MergeTwoFunctions(OldF.getFunc(), DeleteF);
+ return true;
+}
+
+// IsThunk - This method determines whether or not a given Function is a thunk\// like the ones emitted by this pass and therefore not subject to further
+// merging.
+static bool IsThunk(const Function *F) {
+ // The safe direction to fail is to return true. In that case, the function
+ // will be removed from merging analysis. If we failed to including functions
+ // then we may try to merge unmergable thing (ie., identical weak functions)
+ // which will push us into an infinite loop.
+
+ assert(!F->isDeclaration() && "Expected a function definition.");
+
+ const BasicBlock *BB = &F->front();
+ // A thunk is:
+ // bitcast-inst*
+ // optional-reg tail call @thunkee(args...*)
+ // ret void|optional-reg
+ // where the args are in the same order as the arguments.
+
+ // Put this at the top since it triggers most often.
+ const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) return false;
+
+ // Verify that the sequence of bitcast-inst's are all casts of arguments and
+ // that there aren't any extras (ie. no repeated casts).
+ int LastArgNo = -1;
+ BasicBlock::const_iterator I = BB->begin();
+ while (const BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ const Argument *A = dyn_cast<Argument>(BCI->getOperand(0));
+ if (!A) return false;
+ if ((int)A->getArgNo() <= LastArgNo) return false;
+ LastArgNo = A->getArgNo();
+ ++I;
+ }
+
+ // Verify that we have a direct tail call and that the calling conventions
+ // and number of arguments match.
+ const CallInst *CI = dyn_cast<CallInst>(I++);
+ if (!CI || !CI->isTailCall() || !CI->getCalledFunction() ||
+ CI->getCallingConv() != CI->getCalledFunction()->getCallingConv() ||
+ CI->getNumArgOperands() != F->arg_size())
+ return false;
+
+ // Verify that the call instruction has the same arguments as this function
+ // and that they're all either the incoming argument or a cast of the right
+ // argument.
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ const Value *V = CI->getArgOperand(i);
+ const Argument *A = dyn_cast<Argument>(V);
+ if (!A) {
+ const BitCastInst *BCI = dyn_cast<BitCastInst>(V);
+ if (!BCI) return false;
+ A = cast<Argument>(BCI->getOperand(0));
+ }
+ if (A->getArgNo() != i) return false;
+ }
+
+ // Verify that the terminator is a ret void (if we're void) or a ret of the
+ // call's return, or a ret of a bitcast of the call's return.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ ++I;
+ if (BCI->getOperand(0) != CI) return false;
+ }
+ if (RI != I) return false;
+ if (RI->getNumOperands() == 0)
+ return CI->getType()->isVoidTy();
+ return RI->getReturnValue() == CI;
+}
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
TD = getAnalysisIfAvailable<TargetData>();
bool LocalChanged;
do {
+ DEBUG(dbgs() << "size of module: " << M.size() << '\n');
LocalChanged = false;
- DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
- for (std::map<unsigned long, std::vector<Function *> >::iterator
- I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
- std::vector<Function *> &FnVec = I->second;
- DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
-
- for (int i = 0, e = FnVec.size(); i != e; ++i) {
- for (int j = i + 1; j != e; ++j) {
- bool isEqual = equals(FnVec[i], FnVec[j]);
-
- DEBUG(dbgs() << " " << FnVec[i]->getName()
- << (isEqual ? " == " : " != ")
- << FnVec[j]->getName() << "\n");
-
- if (isEqual) {
- if (fold(FnVec, i, j)) {
- LocalChanged = true;
- FnVec.erase(FnVec.begin() + j);
- --j, --e;
- }
- }
- }
+ FnSetType FnSet;
+
+ // Insert only strong functions and merge them. Strong function merging
+ // always deletes one of them.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+ Function *F = I++;
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ !F->mayBeOverridden() && !IsThunk(F)) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ LocalChanged |= Insert(FnSet, CF);
}
+ }
+ // Insert only weak functions and merge them. By doing these second we
+ // create thunks to the strong function when possible. When two weak
+ // functions are identical, we create a new strong function with two weak
+ // weak thunks to it which are identical but not mergable.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+ Function *F = I++;
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ F->mayBeOverridden() && !IsThunk(F)) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ LocalChanged |= Insert(FnSet, CF);
+ }
}
+ DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
Changed |= LocalChanged;
} while (LocalChanged);
return Changed;
}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS) {
+ if (LHS.getFunc() == RHS.getFunc() &&
+ LHS.getHash() == RHS.getHash())
+ return true;
+ if (!LHS.getFunc() || !RHS.getFunc())
+ return false;
+ assert(LHS.getTD() == RHS.getTD() &&
+ "Comparing functions for different targets");
+ return FunctionComparator(LHS.getTD(),
+ LHS.getFunc(), RHS.getFunc()).Compare();
+}
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialInlining.cpp Tue Oct 26 19:48:03 2010
@@ -30,7 +30,9 @@
struct PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
static char ID; // Pass identification, replacement for typeid
- PartialInliner() : ModulePass(&ID) {}
+ PartialInliner() : ModulePass(ID) {
+ initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module& M);
@@ -41,7 +43,7 @@
char PartialInliner::ID = 0;
INITIALIZE_PASS(PartialInliner, "partial-inliner",
- "Partial Inliner", false, false);
+ "Partial Inliner", false, false)
ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
@@ -67,8 +69,9 @@
return 0;
// Clone the function, so that we can hack away on it.
- ValueMap<const Value*, Value*> VMap;
- Function* duplicateFunction = CloneFunction(F, VMap);
+ ValueToValueMapTy VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(duplicateFunction);
BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PartialSpecialization.cpp Tue Oct 26 19:48:03 2010
@@ -25,6 +25,7 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Support/CallSite.h"
#include "llvm/ADT/DenseSet.h"
@@ -37,47 +38,49 @@
// Maximum number of arguments markable interested
static const int MaxInterests = 6;
-// Call must be used at least occasionally
-static const int CallsMin = 5;
-
-// Must have 10% of calls having the same constant to specialize on
-static const double ConstValPercent = .1;
-
namespace {
typedef SmallVector<int, MaxInterests> InterestingArgVector;
class PartSpec : public ModulePass {
void scanForInterest(Function&, InterestingArgVector&);
int scanDistribution(Function&, int, std::map<Constant*, int>&);
+ InlineCostAnalyzer CA;
public :
static char ID; // Pass identification, replacement for typeid
- PartSpec() : ModulePass(&ID) {}
+ PartSpec() : ModulePass(ID) {
+ initializePartSpecPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
};
}
char PartSpec::ID = 0;
INITIALIZE_PASS(PartSpec, "partialspecialization",
- "Partial Specialization", false, false);
+ "Partial Specialization", false, false)
// Specialize F by replacing the arguments (keys) in replacements with the
// constants (values). Replace all calls to F with those constants with
// a call to the specialized function. Returns the specialized function
static Function*
SpecializeFunction(Function* F,
- ValueMap<const Value*, Value*>& replacements) {
+ ValueToValueMapTy& replacements) {
// arg numbers of deleted arguments
DenseMap<unsigned, const Argument*> deleted;
- for (ValueMap<const Value*, Value*>::iterator
+ for (ValueToValueMapTy::iterator
repb = replacements.begin(), repe = replacements.end();
repb != repe; ++repb) {
Argument const *arg = cast<const Argument>(repb->first);
deleted[arg->getArgNo()] = arg;
}
- Function* NF = CloneFunction(F, replacements);
+ Function* NF = CloneFunction(F, replacements,
+ /*ModuleLevelChanges=*/false);
NF->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(NF);
+ // FIXME: Specialized versions getting the same constants should also get
+ // the same name. That way, specializations for public functions can be
+ // marked linkonce_odr and reused across modules.
+
for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
ii != ee; ) {
Value::use_iterator i = ii;
@@ -148,22 +151,37 @@
bool breakOuter = false;
for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
std::map<Constant*, int> distribution;
- int total = scanDistribution(F, interestingArgs[x], distribution);
- if (total > CallsMin)
- for (std::map<Constant*, int>::iterator ii = distribution.begin(),
- ee = distribution.end(); ii != ee; ++ii)
- if (total > ii->second && ii->first &&
- ii->second > total * ConstValPercent) {
- ValueMap<const Value*, Value*> m;
- Function::arg_iterator arg = F.arg_begin();
- for (int y = 0; y < interestingArgs[x]; ++y)
- ++arg;
- m[&*arg] = ii->first;
- SpecializeFunction(&F, m);
- ++numSpecialized;
- breakOuter = true;
- Changed = true;
- }
+ scanDistribution(F, interestingArgs[x], distribution);
+ for (std::map<Constant*, int>::iterator ii = distribution.begin(),
+ ee = distribution.end(); ii != ee; ++ii) {
+ // The distribution map might have an entry for NULL (i.e., one or more
+ // callsites were passing a non-constant there). We allow that to
+ // happen so that we can see whether any callsites pass a non-constant;
+ // if none do and the function is internal, we might have an opportunity
+ // to kill the original function.
+ if (!ii->first) continue;
+ int bonus = ii->second;
+ SmallVector<unsigned, 1> argnos;
+ argnos.push_back(interestingArgs[x]);
+ InlineCost cost = CA.getSpecializationCost(&F, argnos);
+ // FIXME: If this is the last constant entry, and no non-constant
+ // entries exist, and the target function is internal, the cost should
+ // be reduced by the original size of the target function, almost
+ // certainly making it negative and causing a specialization that will
+ // leave the original function dead and removable.
+ if (cost.isAlways() ||
+ (cost.isVariable() && cost.getValue() < bonus)) {
+ ValueToValueMapTy m;
+ Function::arg_iterator arg = F.arg_begin();
+ for (int y = 0; y < interestingArgs[x]; ++y)
+ ++arg;
+ m[&*arg] = ii->first;
+ SpecializeFunction(&F, m);
+ ++numSpecialized;
+ breakOuter = true;
+ Changed = true;
+ }
+ }
}
}
return Changed;
@@ -174,28 +192,20 @@
void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
ii != ee; ++ii) {
- for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
- ui != ue; ++ui) {
-
- bool interesting = false;
- User *U = *ui;
- if (isa<CmpInst>(U)) interesting = true;
- else if (isa<CallInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<InvokeInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<SwitchInst>(U)) interesting = true;
- else if (isa<BranchInst>(U)) interesting = true;
-
- if (interesting) {
- args.push_back(std::distance(F.arg_begin(), ii));
- break;
- }
+ int argno = std::distance(F.arg_begin(), ii);
+ SmallVector<unsigned, 1> argnos;
+ argnos.push_back(argno);
+ int bonus = CA.getSpecializationBonus(&F, argnos);
+ if (bonus > 0) {
+ args.push_back(argno);
}
}
}
/// scanDistribution - Construct a histogram of constants for arg of F at arg.
+/// For each distinct constant, we'll compute the total of the specialization
+/// bonus across all callsites passing that constant; if that total exceeds
+/// the specialization cost, we will create the specialization.
int PartSpec::scanDistribution(Function& F, int arg,
std::map<Constant*, int>& dist) {
bool hasIndirect = false;
@@ -205,7 +215,10 @@
User *U = *ii;
CallSite CS(U);
if (CS && CS.getCalledFunction() == &F) {
- ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
+ SmallVector<unsigned, 1> argnos;
+ argnos.push_back(arg);
+ dist[dyn_cast<Constant>(CS.getArgument(arg))] +=
+ CA.getSpecializationBonus(&F, argnos);
++total;
} else
hasIndirect = true;
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/PruneEH.cpp Tue Oct 26 19:48:03 2010
@@ -37,7 +37,9 @@
namespace {
struct PruneEH : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- PruneEH() : CallGraphSCCPass(&ID) {}
+ PruneEH() : CallGraphSCCPass(ID) {
+ initializePruneEHPass(*PassRegistry::getPassRegistry());
+ }
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +50,11 @@
}
char PruneEH::ID = 0;
-INITIALIZE_PASS(PruneEH, "prune-eh",
- "Remove unused exception handling info", false, false);
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StripDeadPrototypes.cpp Tue Oct 26 19:48:03 2010
@@ -29,7 +29,9 @@
class StripDeadPrototypesPass : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(&ID) { }
+ StripDeadPrototypesPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
};
@@ -37,7 +39,7 @@
char StripDeadPrototypesPass::ID = 0;
INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false);
+ "Strip Unused Function Prototypes", false, false)
bool StripDeadPrototypesPass::runOnModule(Module &M) {
bool MadeChange = false;
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StripSymbols.cpp Tue Oct 26 19:48:03 2010
@@ -39,7 +39,9 @@
public:
static char ID; // Pass identification, replacement for typeid
explicit StripSymbols(bool ODI = false)
- : ModulePass(&ID), OnlyDebugInfo(ODI) {}
+ : ModulePass(ID), OnlyDebugInfo(ODI) {
+ initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -52,7 +54,9 @@
public:
static char ID; // Pass identification, replacement for typeid
explicit StripNonDebugSymbols()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {
+ initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -65,7 +69,9 @@
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDebugDeclare()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {
+ initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -78,7 +84,9 @@
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDeadDebugInfo()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {
+ initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -90,7 +98,7 @@
char StripSymbols::ID = 0;
INITIALIZE_PASS(StripSymbols, "strip",
- "Strip all symbols from a module", false, false);
+ "Strip all symbols from a module", false, false)
ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
return new StripSymbols(OnlyDebugInfo);
@@ -99,7 +107,7 @@
char StripNonDebugSymbols::ID = 0;
INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
"Strip all symbols, except dbg symbols, from a module",
- false, false);
+ false, false)
ModulePass *llvm::createStripNonDebugSymbolsPass() {
return new StripNonDebugSymbols();
@@ -107,7 +115,7 @@
char StripDebugDeclare::ID = 0;
INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
- "Strip all llvm.dbg.declare intrinsics", false, false);
+ "Strip all llvm.dbg.declare intrinsics", false, false)
ModulePass *llvm::createStripDebugDeclarePass() {
return new StripDebugDeclare();
@@ -115,7 +123,7 @@
char StripDeadDebugInfo::ID = 0;
INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
- "Strip debug info for unused symbols", false, false);
+ "Strip debug info for unused symbols", false, false)
ModulePass *llvm::createStripDeadDebugInfoPass() {
return new StripDeadDebugInfo();
@@ -350,8 +358,8 @@
for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
E = MDs.end(); I != E; ++I) {
- if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(),
- true)) {
+ GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+ if (GV && M.getGlobalVariable(GV->getName(), true)) {
if (!NMD)
NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
NMD->addOperand(*I);
Modified: llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/IPO/StructRetPromotion.cpp Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===//
+//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -50,20 +50,24 @@
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
- SRETPromotion() : CallGraphSCCPass(&ID) {}
+ SRETPromotion() : CallGraphSCCPass(ID) {
+ initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
+ }
private:
CallGraphNode *PromoteReturn(CallGraphNode *CGN);
bool isSafeToUpdateAllCallers(Function *F);
Function *cloneFunctionBody(Function *F, const StructType *STy);
CallGraphNode *updateCallSites(Function *F, Function *NF);
- bool nestedStructType(const StructType *STy);
};
}
char SRETPromotion::ID = 0;
-INITIALIZE_PASS(SRETPromotion, "sretpromotion",
- "Promote sret arguments to multiple ret values", false, false);
+INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false)
Pass *llvm::createStructRetPromotionPass() {
return new SRETPromotion();
@@ -354,14 +358,3 @@
return NF_CGN;
}
-/// nestedStructType - Return true if STy includes any
-/// other aggregate types
-bool SRETPromotion::nestedStructType(const StructType *STy) {
- unsigned Num = STy->getNumElements();
- for (unsigned i = 0; i < Num; i++) {
- const Type *Ty = STy->getElementType(i);
- if (!Ty->isSingleValueType() && !Ty->isVoidTy())
- return true;
- }
- return false;
-}
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -13,5 +13,3 @@
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
-
-target_link_libraries (LLVMInstCombine LLVMTransformUtils)
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombine.h Tue Oct 26 19:48:03 2010
@@ -81,7 +81,9 @@
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+ initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+ }
public:
virtual bool runOnFunction(Function &F);
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Tue Oct 26 19:48:03 2010
@@ -207,15 +207,26 @@
}
break;
case Instruction::Or:
- if (Together == AndRHS) // (X | C) & C --> C
- return ReplaceInstUsesWith(TheAnd, AndRHS);
-
- if (Op->hasOneUse() && Together != OpRHS) {
- // (X | C1) & C2 --> (X | (C1&C2)) & C2
- Value *Or = Builder->CreateOr(X, Together);
- Or->takeName(Op);
- return BinaryOperator::CreateAnd(Or, AndRHS);
+ if (Op->hasOneUse()){
+ if (Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+
+ ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+ if (TogetherCI && !TogetherCI->isZero()){
+ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+ // NOTE: This reduces the number of bits set in the & mask, which
+ // can expose opportunities for store narrowing.
+ Together = ConstantExpr::getXor(AndRHS, Together);
+ Value *And = Builder->CreateAnd(X, Together);
+ And->takeName(Op);
+ return BinaryOperator::CreateOr(And, OpRHS);
+ }
}
+
break;
case Instruction::Add:
if (Op->hasOneUse()) {
@@ -434,6 +445,270 @@
return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
}
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only
+/// if (A & B) == A, or all bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only
+/// if (A & B) == 0, or all bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
+/// contain any number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+ FoldMskICmp_AMask_AllOnes = 1,
+ FoldMskICmp_AMask_NotAllOnes = 2,
+ FoldMskICmp_BMask_AllOnes = 4,
+ FoldMskICmp_BMask_NotAllOnes = 8,
+ FoldMskICmp_Mask_AllZeroes = 16,
+ FoldMskICmp_Mask_NotAllZeroes = 32,
+ FoldMskICmp_AMask_Mixed = 64,
+ FoldMskICmp_AMask_NotMixed = 128,
+ FoldMskICmp_BMask_Mixed = 256,
+ FoldMskICmp_BMask_NotMixed = 512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
+ ICmpInst::Predicate SCC)
+{
+ ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+ bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ ACst->getValue().isPowerOf2());
+ bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ BCst->getValue().isPowerOf2());
+ unsigned result = 0;
+ if (CCst != 0 && CCst->isZero()) {
+ // if C is zero, then both A and B qualify as mask
+ result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed));
+ return result;
+ }
+ if (A == C) {
+ result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed)
+ : (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed));
+ }
+ else if (ACst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+ : FoldMskICmp_AMask_NotMixed);
+ }
+ if (B == C)
+ {
+ result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_BMask_Mixed));
+ }
+ else if (BCst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+ : FoldMskICmp_BMask_NotMixed);
+ }
+ return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
+ Value*& B, Value*& C,
+ Value*& D, Value*& E,
+ ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+ if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+ // vectors are not (yet?) supported
+ if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+ // Here comes the tricky part:
+ // LHS might be of the form L11 & L12 == X, X == L21 & L22,
+ // and L11 & L12 == L21 & L22. The same goes for RHS.
+ // Now we must find those components L** and R**, that are equal, so
+ // that we can extract the parameters A, B, C, D, and E for the canonical
+ // above.
+ Value *L1 = LHS->getOperand(0);
+ Value *L2 = LHS->getOperand(1);
+ Value *L11,*L12,*L21,*L22;
+ if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ L21 = L22 = 0;
+ }
+ else {
+ if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+ return 0;
+ std::swap(L1, L2);
+ L21 = L22 = 0;
+ }
+
+ Value *R1 = RHS->getOperand(0);
+ Value *R2 = RHS->getOperand(1);
+ Value *R11,*R12;
+ bool ok = false;
+ if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R2; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R2; ok = true;
+ }
+ }
+ if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R1; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R1; ok = true;
+ }
+ else
+ return 0;
+ }
+ if (!ok)
+ return 0;
+
+ if (L11 == A) {
+ B = L12; C = L2;
+ }
+ else if (L12 == A) {
+ B = L11; C = L2;
+ }
+ else if (L21 == A) {
+ B = L22; C = L1;
+ }
+ else if (L22 == A) {
+ B = L21; C = L1;
+ }
+
+ unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+ unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate NEWCC,
+ llvm::InstCombiner::BuilderTy* Builder) {
+ Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+ if (mask == 0) return 0;
+
+ if (NEWCC == ICmpInst::ICMP_NE)
+ mask >>= 1; // treat "Not"-states as normal states
+
+ if (mask & FoldMskICmp_Mask_AllZeroes) {
+ // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
+ // -> (icmp eq (A & (B|D)), 0)
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ // we can't use C as zero, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // with B and D, having a single bit set
+ Value* zero = Constant::getNullValue(A->getType());
+ return Builder->CreateICmp(NEWCC, newAnd, zero);
+ }
+ else if (mask & FoldMskICmp_BMask_AllOnes) {
+ // (icmp eq (A & B), B) & (icmp eq (A & D), D)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr);
+ }
+ else if (mask & FoldMskICmp_AMask_AllOnes) {
+ // (icmp eq (A & B), A) & (icmp eq (A & D), A)
+ // -> (icmp eq (A & (B&D)), A)
+ Value* newAnd1 = Builder->CreateAnd(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newAnd1);
+ return Builder->CreateICmp(NEWCC, newAnd, A);
+ }
+ else if (mask & FoldMskICmp_BMask_Mixed) {
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+ // we can't simply use C and E, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set
+
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (CCst == 0) return 0;
+ if (LHS->getPredicate() != NEWCC)
+ CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (ECst == 0) return 0;
+ if (RHS->getPredicate() != NEWCC)
+ ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+ ConstantInt* MCst = dyn_cast<ConstantInt>(
+ ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+ ConstantExpr::getXor(CCst, ECst)) );
+ // if there is a conflict we should actually return a false for the
+ // whole construct
+ if (!MCst->isZero())
+ return 0;
+ Value* newOr1 = Builder->CreateOr(B, D);
+ Value* newOr2 = ConstantExpr::getOr(CCst, ECst);
+ Value* newAnd = Builder->CreateAnd(A, newOr1);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+ }
+ return 0;
+}
+
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -451,6 +726,13 @@
return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
+
+ {
+ // handle (roughly):
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ Value* fold = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder);
+ if (fold) return fold;
+ }
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -1145,17 +1427,25 @@
}
}
+ {
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ Value* fold = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder);
+ if (fold) return fold;
+ }
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
if (LHSCst == 0 || RHSCst == 0) return 0;
- // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- if (LHSCst == RHSCst && LHSCC == RHSCC &&
- LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
}
// From here on, we only handle:
@@ -1667,6 +1957,18 @@
}
}
+ // Note: If we've gotten to the point of visiting the outer OR, then the
+ // inner one couldn't be simplified. If it was a constant, then it won't
+ // be simplified by a later pass either, so we try swapping the inner/outer
+ // ORs in the hopes that we'll be able to simplify it this way.
+ // (X|C) | V --> (X|V) | C
+ if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ Value *Inner = Builder->CreateOr(A, Op1);
+ Inner->takeName(Op0);
+ return BinaryOperator::CreateOr(Inner, C1);
+ }
+
return Changed ? &I : 0;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Oct 26 19:48:03 2010
@@ -109,10 +109,9 @@
TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
- unsigned MaxAlign = Value::MaximumAlignment;
// LLVM doesn't support alignments larger than this currently.
- Align = std::min(Align, MaxAlign);
+ Align = std::min(Align, +Value::MaximumAlignment);
if (PrefAlign > Align)
Align = EnforceKnownAlignment(V, Align, PrefAlign);
@@ -281,7 +280,8 @@
// memmove/cpy/set of zero bytes is a noop.
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
- if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+ if (NumBytes->isNullValue())
+ return EraseInstFromFunction(CI);
if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
if (CI->getZExtValue() == 1) {
@@ -290,6 +290,10 @@
// alignment is sufficient.
}
}
+
+ // No other transformations apply to volatile transfers.
+ if (MI->isVolatile())
+ return 0;
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
@@ -539,7 +543,7 @@
// X + 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
- UndefValue::get(II->getCalledValue()->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
@@ -698,6 +702,32 @@
}
break;
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned MemAlign = GetOrEnforceKnownAlignment(II->getArgOperand(0));
+ unsigned AlignArg = II->getNumArgOperands() - 1;
+ ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+ if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+ II->setArgOperand(AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ MemAlign, false));
+ return II;
+ }
+ break;
+ }
+
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineCasts.cpp Tue Oct 26 19:48:03 2010
@@ -396,6 +396,11 @@
case Instruction::Trunc:
// trunc(trunc(x)) -> trunc(x)
return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
@@ -454,6 +459,29 @@
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ Src->hasOneUse()) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
return 0;
}
@@ -538,8 +566,7 @@
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
- else
- return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
}
}
}
@@ -1112,7 +1139,7 @@
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant* SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
Callee->getAttributes(),
Builder->getFloatTy(),
Builder->getFloatTy(),
@@ -1120,6 +1147,11 @@
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
return ret;
}
}
@@ -1335,6 +1367,199 @@
return new ShuffleVectorInst(InVal, V2, Mask);
}
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ const Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
@@ -1386,6 +1611,11 @@
((Instruction*)NULL));
}
}
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
@@ -1395,16 +1625,24 @@
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
- // If this is a cast from an integer to vector, check to see if the input
- // is a trunc or zext of a bitcast from vector. If so, we can replace all
- // the casts with a shuffle and (potentially) a bitcast.
- if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
- CastInst *SrcCast = cast<CastInst>(Src);
- if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
- if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
cast<VectorType>(DestTy), *this))
- return I;
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
}
}
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp Tue Oct 26 19:48:03 2010
@@ -146,10 +146,14 @@
if (TD) {
unsigned KnownAlign =
GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
- if (KnownAlign >
- (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
- LI.getAlignment()))
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+ TD->getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
}
// load (cast X) --> cast (load X) iff safe.
@@ -326,7 +330,9 @@
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
- return new StoreInst(NewCast, CastOp);
+ SI.setOperand(0, NewCast);
+ SI.setOperand(1, CastOp);
+ return &SI;
}
/// equivalentAddressValues - Test if A and B will obviously have the same
@@ -411,10 +417,14 @@
if (TD) {
unsigned KnownAlign =
GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
- if (KnownAlign >
- (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
- SI.getAlignment()))
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+ TD->getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
}
// Do really simple DSE, to catch cases where there are several consecutive
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstCombineShifts.cpp Tue Oct 26 19:48:03 2010
@@ -56,10 +56,270 @@
return 0;
}
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI;
+ if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+
+ case Instruction::Shl: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) return true;
+
+ // We can always turn shl(c)+shr(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned HighBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(TypeWidth, HighBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::LShr: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) return true;
+
+ // We can always turn lshr(c)+shl(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, LowBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: assert(0 && "Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ return I;
+
+ case Instruction::Shl: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+ case Instruction::LShr: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+
+ case Instruction::Select:
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+ NumBits, isLeftShift, IC));
+ return PN;
+ }
+ }
+}
+
+
+
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
+
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return ReplaceInstUsesWith(I,
+ GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ }
+
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
@@ -288,39 +548,17 @@
ConstantInt::get(Ty, AmtSum));
}
- if (ShiftOp->getOpcode() == Instruction::LShr &&
- I.getOpcode() == Instruction::AShr) {
- if (AmtSum >= TypeBits)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0.
- return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
- }
-
- if (ShiftOp->getOpcode() == Instruction::AShr &&
- I.getOpcode() == Instruction::LShr) {
- // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
- if (AmtSum >= TypeBits)
- AmtSum = TypeBits-1;
-
- Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
-
- APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::CreateAnd(Shift,
- ConstantInt::get(I.getContext(), Mask));
- }
-
- // Okay, if we get here, one shift must be left, and the other shift must be
- // right. See if the amounts are equal.
if (ShiftAmt1 == ShiftAmt2) {
// If we have ((X >>? C) << C), turn this into X & (-1 << C).
- if (I.getOpcode() == Instruction::Shl) {
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(),Mask));
}
// If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
- if (I.getOpcode() == Instruction::LShr) {
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(), Mask));
@@ -329,7 +567,8 @@
uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
// (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
- if (I.getOpcode() == Instruction::Shl) {
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
@@ -340,7 +579,8 @@
}
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr) {
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
@@ -355,9 +595,8 @@
uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
// (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
- if (I.getOpcode() == Instruction::Shl) {
- assert(ShiftOp->getOpcode() == Instruction::LShr ||
- ShiftOp->getOpcode() == Instruction::AShr);
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
ConstantInt::get(Ty, ShiftDiff));
@@ -367,8 +606,8 @@
}
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr) {
- assert(ShiftOp->getOpcode() == Instruction::Shl);
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
Modified: llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Oct 26 19:48:03 2010
@@ -48,6 +48,7 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
#include <algorithm>
#include <climits>
using namespace llvm;
@@ -58,10 +59,18 @@
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
STATISTIC(NumSunkInst , "Number of instructions sunk");
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstCombine(*unwrap(R));
+}
char InstCombiner::ID = 0;
INITIALIZE_PASS(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false);
+ "Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LCSSAID);
@@ -1023,10 +1032,8 @@
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
-
- std::vector<Instruction*> InstrsForInstCombineWorklist;
- InstrsForInstCombineWorklist.reserve(128);
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
do {
Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -1,5 +1,6 @@
add_llvm_library(LLVMInstrumentation
EdgeProfiling.cpp
+ Instrumentation.cpp
OptimalEdgeProfiling.cpp
ProfilingUtils.cpp
)
Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp Tue Oct 26 19:48:03 2010
@@ -34,7 +34,9 @@
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(&ID) {}
+ EdgeProfiler() : ModulePass(ID) {
+ initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
virtual const char *getPassName() const {
return "Edge Profiler";
@@ -44,7 +46,7 @@
char EdgeProfiler::ID = 0;
INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
- "Insert instrumentation for edge profiling", false, false);
+ "Insert instrumentation for edge profiling", false, false)
ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(&ID) {}
+ OptimalEdgeProfiler() : ModulePass(ID) {
+ initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +52,14 @@
}
char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
"Insert optimal instrumentation for edge profiling",
- false, false);
+ false, false)
ModulePass *llvm::createOptimalEdgeProfilerPass() {
return new OptimalEdgeProfiler();
Removed: llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ABCD.cpp (removed)
@@ -1,1113 +0,0 @@
-//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass removes redundant branch instructions. This algorithm was
-// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
-// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
-// Algorithm was created to remove array bound checks for strongly typed
-// languages. This implementation expands the idea and removes any conditional
-// branches that can be proved redundant, not only those used in array bound
-// checks. With the SSI representation, each variable has a
-// constraint. By analyzing these constraints we can prove that a branch is
-// redundant. When a branch is proved redundant it means that
-// one direction will always be taken; thus, we can change this branch into an
-// unconditional jump.
-// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
-// after ABCD to clean up the code.
-// This implementation was created based on the implementation of the ABCD
-// algorithm implemented for the compiler Jitrino.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "abcd"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-
-using namespace llvm;
-
-STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
-STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
-
-namespace {
-
-class ABCD : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
- ABCD() : FunctionPass(&ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
-
- bool runOnFunction(Function &F);
-
- private:
- /// Keep track of whether we've modified the program yet.
- bool modified;
-
- enum ProveResult {
- False = 0,
- Reduced = 1,
- True = 2
- };
-
- typedef ProveResult (*meet_function)(ProveResult, ProveResult);
- static ProveResult max(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::max(res1, res2);
- }
- static ProveResult min(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::min(res1, res2);
- }
-
- class Bound {
- public:
- Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
- Bound(const Bound &b, int cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
- Bound(const Bound &b, const APInt &cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
-
- /// Test if Bound is an upper bound
- bool isUpperBound() const { return upper_bound; }
-
- /// Get the bitwidth of this bound
- int32_t getBitWidth() const { return value.getBitWidth(); }
-
- /// Creates a Bound incrementing the one received
- static Bound createIncrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
- b.upper_bound);
- }
-
- /// Creates a Bound decrementing the one received
- static Bound createDecrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
- b.upper_bound);
- }
-
- /// Test if two bounds are equal
- static bool eq(const Bound *a, const Bound *b) {
- if (!a || !b) return false;
-
- assert(a->isUpperBound() == b->isUpperBound());
- return a->value == b->value;
- }
-
- /// Test if val is less than or equal to Bound b
- static bool leq(APInt val, const Bound &b) {
- return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
- }
-
- /// Test if Bound a is less then or equal to Bound
- static bool leq(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.sle(b.value) :
- a.value.sge(b.value);
- }
-
- /// Test if Bound a is less then Bound b
- static bool lt(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.slt(b.value) :
- a.value.sgt(b.value);
- }
-
- /// Test if Bound b is greater then or equal val
- static bool geq(const Bound &b, APInt val) {
- return leq(val, b);
- }
-
- /// Test if Bound a is greater then or equal Bound b
- static bool geq(const Bound &a, const Bound &b) {
- return leq(b, a);
- }
-
- private:
- APInt value;
- bool upper_bound;
- };
-
- /// This class is used to store results some parts of the graph,
- /// so information does not need to be recalculated. The maximum false,
- /// minimum true and minimum reduced results are stored
- class MemoizedResultChart {
- public:
- MemoizedResultChart() {}
- MemoizedResultChart(const MemoizedResultChart &other) {
- if (other.max_false)
- max_false.reset(new Bound(*other.max_false));
- if (other.min_true)
- min_true.reset(new Bound(*other.min_true));
- if (other.min_reduced)
- min_reduced.reset(new Bound(*other.min_reduced));
- }
-
- /// Returns the max false
- const Bound *getFalse() const { return max_false.get(); }
-
- /// Returns the min true
- const Bound *getTrue() const { return min_true.get(); }
-
- /// Returns the min reduced
- const Bound *getReduced() const { return min_reduced.get(); }
-
- /// Return the stored result for this bound
- ProveResult getResult(const Bound &bound) const;
-
- /// Stores a false found
- void addFalse(const Bound &bound);
-
- /// Stores a true found
- void addTrue(const Bound &bound);
-
- /// Stores a Reduced found
- void addReduced(const Bound &bound);
-
- /// Clears redundant reduced
- /// If a min_true is smaller than a min_reduced then the min_reduced
- /// is unnecessary and then removed. It also works for min_reduced
- /// begin smaller than max_false.
- void clearRedundantReduced();
-
- void clear() {
- max_false.reset();
- min_true.reset();
- min_reduced.reset();
- }
-
- private:
- OwningPtr<Bound> max_false, min_true, min_reduced;
- };
-
- /// This class stores the result found for a node of the graph,
- /// so these results do not need to be recalculated, only searched for.
- class MemoizedResult {
- public:
- /// Test if there is true result stored from b to a
- /// that is less then the bound
- bool hasTrue(Value *b, const Bound &bound) const {
- const Bound *trueBound = map.lookup(b).getTrue();
- return trueBound && Bound::leq(*trueBound, bound);
- }
-
- /// Test if there is false result stored from b to a
- /// that is less then the bound
- bool hasFalse(Value *b, const Bound &bound) const {
- const Bound *falseBound = map.lookup(b).getFalse();
- return falseBound && Bound::leq(*falseBound, bound);
- }
-
- /// Test if there is reduced result stored from b to a
- /// that is less then the bound
- bool hasReduced(Value *b, const Bound &bound) const {
- const Bound *reducedBound = map.lookup(b).getReduced();
- return reducedBound && Bound::leq(*reducedBound, bound);
- }
-
- /// Returns the stored bound for b
- ProveResult getBoundResult(Value *b, const Bound &bound) {
- return map[b].getResult(bound);
- }
-
- /// Clears the map
- void clear() {
- DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
- DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
- for (; begin != end; ++begin) {
- begin->second.clear();
- }
- map.clear();
- }
-
- /// Stores the bound found
- void updateBound(Value *b, const Bound &bound, const ProveResult res);
-
- private:
- // Maps a nod in the graph with its results found.
- DenseMap<Value*, MemoizedResultChart> map;
- };
-
- /// This class represents an edge in the inequality graph used by the
- /// ABCD algorithm. An edge connects node v to node u with a value c if
- /// we could infer a constraint v <= u + c in the source program.
- class Edge {
- public:
- Edge(Value *V, APInt val, bool upper)
- : vertex(V), value(val), upper_bound(upper) {}
-
- Value *getVertex() const { return vertex; }
- const APInt &getValue() const { return value; }
- bool isUpperBound() const { return upper_bound; }
-
- private:
- Value *vertex;
- APInt value;
- bool upper_bound;
- };
-
- /// Weighted and Directed graph to represent constraints.
- /// There is one type of constraint, a <= b + X, which will generate an
- /// edge from b to a with weight X.
- class InequalityGraph {
- public:
-
- /// Adds an edge from V_from to V_to with weight value
- void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
-
- /// Test if there is a node V
- bool hasNode(Value *V) const { return graph.count(V); }
-
- /// Test if there is any edge from V in the upper direction
- bool hasEdge(Value *V, bool upper) const;
-
- /// Returns all edges pointed by vertex V
- SmallVector<Edge, 16> getEdges(Value *V) const {
- return graph.lookup(V);
- }
-
- /// Prints the graph in dot format.
- /// Blue edges represent upper bound and Red lower bound.
- void printGraph(raw_ostream &OS, Function &F) const {
- printHeader(OS, F);
- printBody(OS);
- printFooter(OS);
- }
-
- /// Clear the graph
- void clear() {
- graph.clear();
- }
-
- private:
- DenseMap<Value *, SmallVector<Edge, 16> > graph;
-
- /// Prints the header of the dot file
- void printHeader(raw_ostream &OS, Function &F) const;
-
- /// Prints the footer of the dot file
- void printFooter(raw_ostream &OS) const {
- OS << "}\n";
- }
-
- /// Prints the body of the dot file
- void printBody(raw_ostream &OS) const;
-
- /// Prints vertex source to the dot file
- void printVertex(raw_ostream &OS, Value *source) const;
-
- /// Prints the edge to the dot file
- void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
-
- void printName(raw_ostream &OS, Value *info) const;
- };
-
- /// Iterates through all BasicBlocks, if the Terminator Instruction
- /// uses an Comparator Instruction, all operands of this comparator
- /// are sent to be transformed to SSI. Only Instruction operands are
- /// transformed.
- void createSSI(Function &F);
-
- /// Creates the graphs for this function.
- /// It will look for all comparators used in branches, and create them.
- /// These comparators will create constraints for any instruction as an
- /// operand.
- void executeABCD(Function &F);
-
- /// Seeks redundancies in the comparator instruction CI.
- /// If the ABCD algorithm can prove that the comparator CI always
- /// takes one way, then the Terminator Instruction TI is substituted from
- /// a conditional branch to a unconditional one.
- /// This code basically receives a comparator, and verifies which kind of
- /// instruction it is. Depending on the kind of instruction, we use different
- /// strategies to prove its redundancy.
- void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Substitutes Terminator Instruction TI, that is a conditional branch,
- /// with one unconditional branch. Succ_edge determines if the new
- /// unconditional edge will be the first or second edge of the former TI
- /// instruction.
- void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
-
- /// When an conditional branch is removed, the BasicBlock that is no longer
- /// reachable will have problems in phi functions. This method fixes these
- /// phis removing the former BasicBlock from the list of incoming BasicBlocks
- /// of all phis. In case the phi remains with no predecessor it will be
- /// marked to be removed later.
- void fixPhi(BasicBlock *BB, BasicBlock *Succ);
-
- /// Removes phis that have no predecessor
- void removePhis();
-
- /// Creates constraints for Instructions.
- /// If the constraint for this instruction has already been created
- /// nothing is done.
- void createConstraintInstruction(Instruction *I);
-
- /// Creates constraints for Binary Operators.
- /// It will create constraints only for addition and subtraction,
- /// the other binary operations are not treated by ABCD.
- /// For additions in the form a = b + X and a = X + b, where X is a constant,
- /// the constraint a <= b + X can be obtained. For this constraint, an edge
- /// a->b with weight X is added to the lower bound graph, and an edge
- /// b->a with weight -X is added to the upper bound graph.
- /// Only subtractions in the format a = b - X is used by ABCD.
- /// Edges are created using the same semantic as addition.
- void createConstraintBinaryOperator(BinaryOperator *BO);
-
- /// Creates constraints for Comparator Instructions.
- /// Only comparators that have any of the following operators
- /// are used to create constraints: >=, >, <=, <. And only if
- /// at least one operand is an Instruction. In a Comparator Instruction
- /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
- /// t and f represent sigma for operands in true and false branches. The
- /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
- /// b_f <= b. There are two more constraints that depend on the operator.
- /// For the operator <= : a_t <= b_t and b_f <= a_f-1
- /// For the operator < : a_t <= b_t-1 and b_f <= a_f
- /// For the operator >= : b_t <= a_t and a_f <= b_f-1
- /// For the operator > : b_t <= a_t-1 and a_f <= b_f
- void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Creates constraints for PHI nodes.
- /// In a PHI node a = phi(b,c) we can create the constraint
- /// a<= max(b,c). With this constraint there will be the edges,
- /// b->a and c->a with weight 0 in the lower bound graph, and the edges
- /// a->b and a->c with weight 0 in the upper bound graph.
- void createConstraintPHINode(PHINode *PN);
-
- /// Given a binary operator, we are only interest in the case
- /// that one operand is an Instruction and the other is a ConstantInt. In
- /// this case the method returns true, otherwise false. It also obtains the
- /// Instruction and ConstantInt from the BinaryOperator and returns it.
- bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2);
-
- /// This method creates a constraint between a Sigma and an Instruction.
- /// These constraints are created as soon as we find a comparator that uses a
- /// SSI variable.
- void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f);
-
- /// If PN_op1 and PN_o2 are different from NULL, create a constraint
- /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
- /// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value);
-
- /// Returns the sigma representing the Instruction I in BasicBlock BB.
- /// Returns NULL in case there is no sigma for this Instruction in this
- /// Basic Block. This methods assume that sigmas are the first instructions
- /// in a block, and that there can be only two sigmas in a block. So it will
- /// only look on the first two instructions of BasicBlock BB.
- PHINode *findSigma(BasicBlock *BB, Instruction *I);
-
- /// Original ABCD algorithm to prove redundant checks.
- /// This implementation works on any kind of inequality branch.
- bool demandProve(Value *a, Value *b, int c, bool upper_bound);
-
- /// Prove that distance between b and a is <= bound
- ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
-
- /// Updates the distance value for a and b
- void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
- meet_function meet);
-
- InequalityGraph inequality_graph;
- MemoizedResult mem_result;
- DenseMap<Value*, const Bound*> active;
- SmallPtrSet<Value*, 16> created;
- SmallVector<PHINode *, 16> phis_to_remove;
-};
-
-} // end anonymous namespace.
-
-char ABCD::ID = 0;
-INITIALIZE_PASS(ABCD, "abcd",
- "ABCD: Eliminating Array Bounds Checks on Demand",
- false, false);
-
-bool ABCD::runOnFunction(Function &F) {
- modified = false;
- createSSI(F);
- executeABCD(F);
- DEBUG(inequality_graph.printGraph(dbgs(), F));
- removePhis();
-
- inequality_graph.clear();
- mem_result.clear();
- active.clear();
- created.clear();
- phis_to_remove.clear();
- return modified;
-}
-
-/// Iterates through all BasicBlocks, if the Terminator Instruction
-/// uses an Comparator Instruction, all operands of this comparator
-/// are sent to be transformed to SSI. Only Instruction operands are
-/// transformed.
-void ABCD::createSSI(Function &F) {
- SSI *ssi = &getAnalysis<SSI>();
-
- SmallVector<Instruction *, 16> Insts;
-
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
- modified = true; // XXX: but yet createSSI might do nothing
- Insts.push_back(I);
- }
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
- modified = true;
- Insts.push_back(I);
- }
- }
- }
- ssi->createSSI(Insts);
-}
-
-/// Creates the graphs for this function.
-/// It will look for all comparators used in branches, and create them.
-/// These comparators will create constraints for any instruction as an
-/// operand.
-void ABCD::executeABCD(Function &F) {
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
- if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
- continue;
-
- createConstraintCmpInst(ICI, TI);
- seekRedundancy(ICI, TI);
- }
-}
-
-/// Seeks redundancies in the comparator instruction CI.
-/// If the ABCD algorithm can prove that the comparator CI always
-/// takes one way, then the Terminator Instruction TI is substituted from
-/// a conditional branch to a unconditional one.
-/// This code basically receives a comparator, and verifies which kind of
-/// instruction it is. Depending on the kind of instruction, we use different
-/// strategies to prove its redundancy.
-void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
- CmpInst::Predicate Pred = ICI->getPredicate();
-
- Value *source, *dest;
- int distance1, distance2;
- bool upper;
-
- switch(Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- upper = false;
- distance1 = 1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- upper = false;
- distance1 = 0;
- distance2 = -1;
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- upper = true;
- distance1 = -1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- upper = true;
- distance1 = 0;
- distance2 = 1;
- break;
-
- default:
- return;
- }
-
- ++NumBranchTested;
- source = ICI->getOperand(0);
- dest = ICI->getOperand(1);
- if (demandProve(dest, source, distance1, upper)) {
- removeRedundancy(TI, true);
- } else if (demandProve(dest, source, distance2, !upper)) {
- removeRedundancy(TI, false);
- }
-}
-
-/// Substitutes Terminator Instruction TI, that is a conditional branch,
-/// with one unconditional branch. Succ_edge determines if the new
-/// unconditional edge will be the first or second edge of the former TI
-/// instruction.
-void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
- BasicBlock *Succ;
- if (Succ_edge) {
- Succ = TI->getSuccessor(0);
- fixPhi(TI->getParent(), TI->getSuccessor(1));
- } else {
- Succ = TI->getSuccessor(1);
- fixPhi(TI->getParent(), TI->getSuccessor(0));
- }
-
- BranchInst::Create(Succ, TI);
- TI->eraseFromParent(); // XXX: invoke
- ++NumBranchRemoved;
- modified = true;
-}
-
-/// When an conditional branch is removed, the BasicBlock that is no longer
-/// reachable will have problems in phi functions. This method fixes these
-/// phis removing the former BasicBlock from the list of incoming BasicBlocks
-/// of all phis. In case the phi remains with no predecessor it will be
-/// marked to be removed later.
-void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
- BasicBlock::iterator begin = Succ->begin();
- while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
- PN->removeIncomingValue(BB, false);
- if (PN->getNumIncomingValues() == 0)
- phis_to_remove.push_back(PN);
- }
-}
-
-/// Removes phis that have no predecessor
-void ABCD::removePhis() {
- for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
- PHINode *PN = phis_to_remove[i];
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- PN->eraseFromParent();
- }
-}
-
-/// Creates constraints for Instructions.
-/// If the constraint for this instruction has already been created
-/// nothing is done.
-void ABCD::createConstraintInstruction(Instruction *I) {
- // Test if this instruction has not been created before
- if (created.insert(I)) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- createConstraintBinaryOperator(BO);
- } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
- createConstraintPHINode(PN);
- }
- }
-}
-
-/// Creates constraints for Binary Operators.
-/// It will create constraints only for addition and subtraction,
-/// the other binary operations are not treated by ABCD.
-/// For additions in the form a = b + X and a = X + b, where X is a constant,
-/// the constraint a <= b + X can be obtained. For this constraint, an edge
-/// a->b with weight X is added to the lower bound graph, and an edge
-/// b->a with weight -X is added to the upper bound graph.
-/// Only subtractions in the format a = b - X is used by ABCD.
-/// Edges are created using the same semantic as addition.
-void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
- Instruction *I1 = NULL, *I2 = NULL;
- ConstantInt *CI1 = NULL, *CI2 = NULL;
-
- // Test if an operand is an Instruction and the other is a Constant
- if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
- return;
-
- Instruction *I = 0;
- APInt value;
-
- switch (BO->getOpcode()) {
- case Instruction::Add:
- if (I1) {
- I = I1;
- value = CI2->getValue();
- } else if (I2) {
- I = I2;
- value = CI1->getValue();
- }
- break;
-
- case Instruction::Sub:
- // Instructions like a = X-b, where X is a constant are not represented
- // in the graph.
- if (!I1)
- return;
-
- I = I1;
- value = -CI2->getValue();
- break;
-
- default:
- return;
- }
-
- inequality_graph.addEdge(I, BO, value, true);
- inequality_graph.addEdge(BO, I, -value, false);
- createConstraintInstruction(I);
-}
-
-/// Given a binary operator, we are only interest in the case
-/// that one operand is an Instruction and the other is a ConstantInt. In
-/// this case the method returns true, otherwise false. It also obtains the
-/// Instruction and ConstantInt from the BinaryOperator and returns it.
-bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2) {
- Value *op1 = BO->getOperand(0);
- Value *op2 = BO->getOperand(1);
-
- if ((*I1 = dyn_cast<Instruction>(op1))) {
- if ((*C2 = dyn_cast<ConstantInt>(op2)))
- return true; // First is Instruction and second ConstantInt
-
- return false; // Both are Instruction
- } else {
- if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
- (*I2 = dyn_cast<Instruction>(op2)))
- return true; // First is ConstantInt and second Instruction
-
- return false; // Both are not Instruction
- }
-}
-
-/// Creates constraints for Comparator Instructions.
-/// Only comparators that have any of the following operators
-/// are used to create constraints: >=, >, <=, <. And only if
-/// at least one operand is an Instruction. In a Comparator Instruction
-/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-/// t and f represent sigma for operands in true and false branches. The
-/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-/// b_f <= b. There are two more constraints that depend on the operator.
-/// For the operator <= : a_t <= b_t and b_f <= a_f-1
-/// For the operator < : a_t <= b_t-1 and b_f <= a_f
-/// For the operator >= : b_t <= a_t and a_f <= b_f-1
-/// For the operator > : b_t <= a_t-1 and a_f <= b_f
-void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
- Value *V_op1 = ICI->getOperand(0);
- Value *V_op2 = ICI->getOperand(1);
-
- if (!V_op1->getType()->isIntegerTy())
- return;
-
- Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
- Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
-
- // Test if at least one operand is an Instruction
- if (!I_op1 && !I_op2)
- return;
-
- BasicBlock *BB_succ_t = TI->getSuccessor(0);
- BasicBlock *BB_succ_f = TI->getSuccessor(1);
-
- PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
- *SIG_op2_t = NULL, *SIG_op2_f = NULL;
-
- createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
- createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
-
- int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
- APInt MinusOne = APInt::getAllOnesValue(width);
- APInt Zero = APInt::getNullValue(width);
-
- CmpInst::Predicate Pred = ICI->getPredicate();
- ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
- ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
- switch (Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
- break;
-
- default:
- break;
- }
-
- if (I_op1)
- createConstraintInstruction(I_op1);
- if (I_op2)
- createConstraintInstruction(I_op2);
-}
-
-/// Creates constraints for PHI nodes.
-/// In a PHI node a = phi(b,c) we can create the constraint
-/// a<= max(b,c). With this constraint there will be the edges,
-/// b->a and c->a with weight 0 in the lower bound graph, and the edges
-/// a->b and a->c with weight 0 in the upper bound graph.
-void ABCD::createConstraintPHINode(PHINode *PN) {
- // FIXME: We really want to disallow sigma nodes, but I don't know the best
- // way to detect the other than this.
- if (PN->getNumOperands() == 2) return;
-
- int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- createConstraintInstruction(I);
- }
- inequality_graph.addEdge(V, PN, APInt(width, 0), true);
- inequality_graph.addEdge(V, PN, APInt(width, 0), false);
- }
-}
-
-/// This method creates a constraint between a Sigma and an Instruction.
-/// These constraints are created as soon as we find a comparator that uses a
-/// SSI variable.
-void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f) {
- *SIG_op_t = findSigma(BB_succ_t, I_op);
- *SIG_op_f = findSigma(BB_succ_f, I_op);
-
- if (*SIG_op_t) {
- int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- }
- if (*SIG_op_f) {
- int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- }
-}
-
-/// If PN_op1 and PN_o2 are different from NULL, create a constraint
-/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-/// with the respective V_op#, if V_op# is a ConstantInt.
-void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value) {
- if (SIG_op1 && SIG_op2) {
- inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
- } else if (SIG_op1 && V_op2) {
- inequality_graph.addEdge(V_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
- } else if (SIG_op2 && V_op1) {
- inequality_graph.addEdge(SIG_op2, V_op1, value, true);
- inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
- }
-}
-
-/// Returns the sigma representing the Instruction I in BasicBlock BB.
-/// Returns NULL in case there is no sigma for this Instruction in this
-/// Basic Block. This methods assume that sigmas are the first instructions
-/// in a block, and that there can be only two sigmas in a block. So it will
-/// only look on the first two instructions of BasicBlock BB.
-PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
- // BB has more than one predecessor, BB cannot have sigmas.
- if (I == NULL || BB->getSinglePredecessor() == NULL)
- return NULL;
-
- BasicBlock::iterator begin = BB->begin();
- BasicBlock::iterator end = BB->end();
-
- for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
- Instruction *I_succ = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I_succ))
- if (PN->getIncomingValue(0) == I)
- return PN;
- }
-
- return NULL;
-}
-
-/// Original ABCD algorithm to prove redundant checks.
-/// This implementation works on any kind of inequality branch.
-bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
- int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
- Bound bound(APInt(width, c), upper_bound);
-
- mem_result.clear();
- active.clear();
-
- ProveResult res = prove(a, b, bound, 0);
- return res != False;
-}
-
-/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
- unsigned level) {
- // if (C[b-a<=e] == True for some e <= bound
- // Same or stronger difference was already proven
- if (mem_result.hasTrue(b, bound))
- return True;
-
- // if (C[b-a<=e] == False for some e >= bound
- // Same or weaker difference was already disproved
- if (mem_result.hasFalse(b, bound))
- return False;
-
- // if (C[b-a<=e] == Reduced for some e <= bound
- // b is on a cycle that was reduced for same or stronger difference
- if (mem_result.hasReduced(b, bound))
- return Reduced;
-
- // traversal reached the source vertex
- if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
- return True;
-
- // if b has no predecessor then fail
- if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
- return False;
-
- // a cycle was encountered
- if (active.count(b)) {
- if (Bound::leq(*active.lookup(b), bound))
- return Reduced; // a "harmless" cycle
-
- return False; // an amplifying cycle
- }
-
- active[b] = &bound;
- PHINode *PN = dyn_cast<PHINode>(b);
-
- // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
- // value, then it is a phi, if it has 1 incoming value it is a sigma.
- if (PN && PN->getNumIncomingValues() > 1)
- updateMemDistance(a, b, bound, level, min);
- else
- updateMemDistance(a, b, bound, level, max);
-
- active.erase(b);
-
- ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
- return res;
-}
-
-/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
- unsigned level, meet_function meet) {
- ABCD::ProveResult res = (meet == max) ? False : True;
-
- SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
- SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
-
- for (; begin != end ; ++begin) {
- if (((res >= Reduced) && (meet == max)) ||
- ((res == False) && (meet == min))) {
- break;
- }
- const Edge &in = *begin;
- if (in.isUpperBound() == bound.isUpperBound()) {
- Value *succ = in.getVertex();
- res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
- level+1));
- }
- }
-
- mem_result.updateBound(b, bound, res);
-}
-
-/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
- if (max_false && Bound::leq(bound, *max_false))
- return False;
- if (min_true && Bound::leq(*min_true, bound))
- return True;
- if (min_reduced && Bound::leq(*min_reduced, bound))
- return Reduced;
- return False;
-}
-
-/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
- if (!max_false || Bound::leq(*max_false, bound))
- max_false.reset(new Bound(bound));
-
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
- if (Bound::eq(max_false.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
- if (!min_true || Bound::leq(bound, *min_true))
- min_true.reset(new Bound(bound));
-
- if (Bound::eq(min_true.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
- if (Bound::eq(min_true.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
- if (!min_reduced || Bound::leq(bound, *min_reduced))
- min_reduced.reset(new Bound(bound));
-
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-}
-
-/// Clears redundant reduced
-/// If a min_true is smaller than a min_reduced then the min_reduced
-/// is unnecessary and then removed. It also works for min_reduced
-/// begin smaller than max_false.
-void ABCD::MemoizedResultChart::clearRedundantReduced() {
- if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
- min_reduced.reset();
- if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
- min_reduced.reset();
-}
-
-/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
- const ProveResult res) {
- if (res == False) {
- map[b].addFalse(bound);
- } else if (res == True) {
- map[b].addTrue(bound);
- } else {
- map[b].addReduced(bound);
- }
-}
-
-/// Adds an edge from V_from to V_to with weight value
-void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
- APInt value, bool upper) {
- assert(V_from->getType() == V_to->getType());
- assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
- value.getBitWidth());
-
- graph[V_from].push_back(Edge(V_to, value, upper));
-}
-
-/// Test if there is any edge from V in the upper direction
-bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
- SmallVector<Edge, 16> it = graph.lookup(V);
-
- SmallVector<Edge, 16>::iterator begin = it.begin();
- SmallVector<Edge, 16>::iterator end = it.end();
- for (; begin != end; ++begin) {
- if (begin->isUpperBound() == upper) {
- return true;
- }
- }
- return false;
-}
-
-/// Prints the header of the dot file
-void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
- OS << "digraph dotgraph {\n";
- OS << "label=\"Inequality Graph for \'";
- OS << F.getNameStr() << "\' function\";\n";
- OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
-}
-
-/// Prints the body of the dot file
-void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
- graph.begin(), end = graph.end();
-
- for (; begin != end ; ++begin) {
- SmallVector<Edge, 16>::const_iterator begin_par =
- begin->second.begin(), end_par = begin->second.end();
- Value *source = begin->first;
-
- printVertex(OS, source);
-
- for (; begin_par != end_par ; ++begin_par) {
- const Edge &edge = *begin_par;
- printEdge(OS, source, edge);
- }
- }
-}
-
-/// Prints vertex source to the dot file
-///
-void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " [label=\"{";
- printName(OS, source);
- OS << "}\"];\n";
-}
-
-/// Prints the edge to the dot file
-void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
- const Edge &edge) const {
- Value *dest = edge.getVertex();
- APInt value = edge.getValue();
- bool upper = edge.isUpperBound();
-
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " -> ";
- OS << "\"";
- printName(OS, dest);
- OS << "\"";
- OS << " [label=\"" << value << "\"";
- if (upper) {
- OS << "color=\"blue\"";
- } else {
- OS << "color=\"red\"";
- }
- OS << "];\n";
-}
-
-void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
- OS << *CI;
- } else {
- if (!info->hasName()) {
- info->setName("V");
- }
- OS << info->getNameStr();
- }
-}
-
-/// createABCDPass - The public interface to this file...
-FunctionPass *llvm::createABCDPass() {
- return new ABCD();
-}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ADCE.cpp Tue Oct 26 19:48:03 2010
@@ -33,7 +33,9 @@
namespace {
struct ADCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(&ID) {}
+ ADCE() : FunctionPass(ID) {
+ initializeADCEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function& F);
@@ -45,7 +47,7 @@
}
char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp Tue Oct 26 19:48:03 2010
@@ -41,7 +41,9 @@
namespace {
struct BlockPlacement : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(&ID) {}
+ BlockPlacement() : FunctionPass(ID) {
+ initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -74,8 +76,11 @@
}
char BlockPlacement::ID = 0;
-INITIALIZE_PASS(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false);
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -1,9 +1,9 @@
add_llvm_library(LLVMScalarOpts
- ABCD.cpp
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
+ CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
GEPSplitter.cpp
@@ -12,11 +12,11 @@
JumpThreading.cpp
LICM.cpp
LoopDeletion.cpp
- LoopIndexSplit.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
+ LowerAtomic.cpp
MemCpyOptimizer.cpp
Reassociate.cpp
Reg2Mem.cpp
@@ -30,5 +30,3 @@
TailDuplication.cpp
TailRecursionElimination.cpp
)
-
-target_link_libraries (LLVMScalarOpts LLVMTransformUtils)
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/CodeGenPrepare.cpp Tue Oct 26 19:48:03 2010
@@ -31,8 +31,10 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -41,6 +43,13 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+STATISTIC(NumElim, "Number of blocks eliminated");
+
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+ cl::desc("Split critical edges during codegen prepare"),
+ cl::init(false), cl::Hidden);
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -54,7 +63,9 @@
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -83,7 +94,7 @@
char CodeGenPrepare::ID = 0;
INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false);
+ "Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
@@ -296,6 +307,7 @@
PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
}
BB->eraseFromParent();
+ ++NumElim;
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
@@ -427,9 +439,9 @@
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
- if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
@@ -730,43 +742,21 @@
bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs) {
bool MadeChange = false;
- InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
-
- // Do a prepass over the constraints, canonicalizing them, and building up the
- // ConstraintOperands list.
- std::vector<InlineAsm::ConstraintInfo>
- ConstraintInfos = IA->ParseConstraints();
-
- /// ConstraintOperands - Information about all of the constraints.
- std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.
- push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
- TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
-
- // Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
- }
+ std::vector<TargetLowering::AsmOperandInfo> TargetConstraints = TLI->ParseConstraints(CS);
+ unsigned ArgNo = 0;
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
- Value *OpVal = OpInfo.CallOperandVal;
+ Value *OpVal = const_cast<Value *>(CS.getArgument(ArgNo++));
MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
- }
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
}
return MadeChange;
@@ -788,7 +778,9 @@
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
if (!LI->hasOneUse() &&
- TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+ TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+ !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType()))
return false;
// Check whether the target supports casts folded into loads.
@@ -812,7 +804,7 @@
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
- // If both result of the {s|z}xt and its source are live out, rewrite all
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
// other uses of the source with result of extension.
Value *Src = I->getOperand(0);
if (Src->hasOneUse())
@@ -891,12 +883,14 @@
bool MadeChange = false;
// Split all critical edges where the dest block has a PHI.
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
+ if (CriticalEdgeSplit) {
+ TerminatorInst *BBTI = BB.getTerminator();
+ if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
}
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ConstantProp.cpp Tue Oct 26 19:48:03 2010
@@ -34,7 +34,9 @@
namespace {
struct ConstantPropagation : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(&ID) {}
+ ConstantPropagation() : FunctionPass(ID) {
+ initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -46,7 +48,7 @@
char ConstantPropagation::ID = 0;
INITIALIZE_PASS(ConstantPropagation, "constprop",
- "Simple constant propagation", false, false);
+ "Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/DCE.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
//
struct DeadInstElimination : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(&ID) {}
+ DeadInstElimination() : BasicBlockPass(ID) {
+ initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -57,7 +59,7 @@
char DeadInstElimination::ID = 0;
INITIALIZE_PASS(DeadInstElimination, "die",
- "Dead Instruction Elimination", false, false);
+ "Dead Instruction Elimination", false, false)
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
@@ -70,7 +72,9 @@
//
struct DCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(&ID) {}
+ DCE() : FunctionPass(ID) {
+ initializeDCEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -81,7 +85,7 @@
}
char DCE::ID = 0;
-INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/DeadStoreElimination.cpp Tue Oct 26 19:48:03 2010
@@ -40,7 +40,9 @@
TargetData *TD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(&ID) {}
+ DSE() : FunctionPass(ID) {
+ initializeDSEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F) {
bool Changed = false;
@@ -77,12 +79,16 @@
AU.addPreserved<MemoryDependenceAnalysis>();
}
- unsigned getPointerSize(Value *V) const;
+ uint64_t getPointerSize(Value *V) const;
};
}
char DSE::ID = 0;
-INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -136,11 +142,11 @@
}
/// getStoreSize - Return the length in bytes of the write by the clobbering
-/// instruction. If variable or unknown, returns -1.
-static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+/// instruction. If variable or unknown, returns AliasAnalysis::UnknownSize.
+static uint64_t getStoreSize(Instruction *I, const TargetData *TD) {
assert(doesClobberMemory(I));
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!TD) return -1u;
+ if (!TD) return AliasAnalysis::UnknownSize;
return TD->getTypeStoreSize(SI->getOperand(0)->getType());
}
@@ -152,7 +158,7 @@
switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
- return -1u;
+ return AliasAnalysis::UnknownSize;
case Intrinsic::lifetime_end:
Len = II->getArgOperand(0);
break;
@@ -161,7 +167,7 @@
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
if (!LenCI->isAllOnesValue())
return LenCI->getZExtValue();
- return -1u;
+ return AliasAnalysis::UnknownSize;
}
/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
@@ -176,10 +182,12 @@
// Exactly the same type, must have exactly the same size.
if (I1Ty == I2Ty) return true;
- int I1Size = getStoreSize(I1, TD);
- int I2Size = getStoreSize(I2, TD);
+ uint64_t I1Size = getStoreSize(I1, TD);
+ uint64_t I2Size = getStoreSize(I2, TD);
- return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
+ return I1Size != AliasAnalysis::UnknownSize &&
+ I2Size != AliasAnalysis::UnknownSize &&
+ I1Size >= I2Size;
}
bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -367,7 +375,7 @@
}
Value *killPointer = 0;
- uint64_t killPointerSize = ~0UL;
+ uint64_t killPointerSize = AliasAnalysis::UnknownSize;
// If we encounter a use of the pointer, it is no longer considered dead
if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
@@ -559,7 +567,7 @@
} while (!NowDeadInsts.empty());
}
-unsigned DSE::getPointerSize(Value *V) const {
+uint64_t DSE::getPointerSize(Value *V) const {
if (TD) {
if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
// Get size information for the alloca
@@ -571,5 +579,5 @@
return TD->getTypeAllocSize(PT->getElementType());
}
}
- return ~0U;
+ return AliasAnalysis::UnknownSize;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/GEPSplitter.cpp Tue Oct 26 19:48:03 2010
@@ -27,13 +27,15 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
public:
static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(&ID) {}
+ explicit GEPSplitter() : FunctionPass(ID) {
+ initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
+ }
};
}
char GEPSplitter::ID = 0;
INITIALIZE_PASS(GEPSplitter, "split-geps",
- "split complex GEPs into simple GEPs", false, false);
+ "split complex GEPs into simple GEPs", false, false)
FunctionPass *llvm::createGEPSplitterPass() {
return new GEPSplitter();
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/GVN.cpp Tue Oct 26 19:48:03 2010
@@ -61,7 +61,6 @@
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
-static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -140,9 +139,9 @@
}
}
- bool operator!=(const Expression &other) const {
+ /*bool operator!=(const Expression &other) const {
return !(*this == other);
- }
+ }*/
};
class ValueTable {
@@ -165,7 +164,6 @@
Expression create_expression(CastInst* C);
Expression create_expression(GetElementPtrInst* G);
Expression create_expression(CallInst* C);
- Expression create_expression(Constant* C);
Expression create_expression(ExtractValueInst* C);
Expression create_expression(InsertValueInst* C);
@@ -177,7 +175,6 @@
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
- unsigned size();
void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
AliasAnalysis *getAliasAnalysis() const { return AA; }
void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
@@ -665,7 +662,9 @@
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
private:
bool NoLoads;
@@ -716,7 +715,11 @@
return new GVN(NoLoads);
}
-INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
@@ -1311,7 +1314,7 @@
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
- SSAUpdate.Initialize(LI);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
const Type *LoadTy = LI->getType();
@@ -1534,26 +1537,19 @@
return false;
if (Blockers.count(TmpBB))
return false;
+
+ // If any of these blocks has more than one successor (i.e. if the edge we
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
+ // above this block would be adding the load to execution paths along
+ // which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
- allSingleSucc = false;
+ return false;
}
assert(TmpBB);
LoadBB = TmpBB;
- // If we have a repl set with LI itself in it, this means we have a loop where
- // at least one of the values is LI. Since this means that we won't be able
- // to eliminate LI even if we insert uses in the other predecessors, we will
- // end up increasing code size. Reject this by scanning for LI.
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- if (ValuesPerBlock[i].isSimpleValue() &&
- ValuesPerBlock[i].getSimpleValue() == LI) {
- // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
- if (!EnableFullLoadPRE || e == 1)
- return false;
- }
- }
-
// FIXME: It is extremely unclear what this loop is doing, other than
// artificially restricting loadpre.
if (isSinglePred) {
@@ -1613,14 +1609,13 @@
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
- if (!EnableFullLoadPRE) {
- // If this load is unavailable in multiple predecessors, reject it.
- // FIXME: If we could restructure the CFG, we could make a common pred with
- // all the preds that don't have an available LI and insert a new load into
- // that one block.
- if (NumUnavailablePreds != 1)
+
+ // If this load is unavailable in multiple predecessors, reject it.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ if (NumUnavailablePreds != 1)
return false;
- }
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
@@ -2111,6 +2106,11 @@
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
uint32_t ValNo = VN.lookup(CurInst);
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/IndVarSimplify.cpp Tue Oct 26 19:48:03 2010
@@ -77,7 +77,9 @@
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(&ID) {}
+ IndVarSimplify() : LoopPass(ID) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -117,8 +119,16 @@
}
char IndVarSimplify::ID = 0;
-INITIALIZE_PASS(IndVarSimplify, "indvars",
- "Canonicalize Induction Variables", false, false);
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false)
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -346,7 +356,7 @@
PHIs.push_back(PN);
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
HandleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
@@ -395,7 +405,7 @@
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
@@ -462,7 +472,7 @@
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
@@ -725,7 +735,7 @@
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/JumpThreading.cpp Tue Oct 26 19:48:03 2010
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -43,12 +44,6 @@
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
-// Turn on use of LazyValueInfo.
-static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
-
-
-
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
@@ -74,15 +69,32 @@
#else
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(&ID) {}
+ JumpThreading() : FunctionPass(ID) {
+ initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (EnableLVI)
- AU.addRequired<LazyValueInfo>();
+ AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
}
void FindLoopHeaders(Function &F);
@@ -111,8 +123,11 @@
}
char JumpThreading::ID = 0;
-INITIALIZE_PASS(JumpThreading, "jump-threading",
- "Jump Threading", false, false);
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -122,7 +137,7 @@
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
- LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
+ LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
@@ -144,6 +159,7 @@
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
+ LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -164,6 +180,11 @@
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
BasicBlock *Succ = BI->getSuccessor(0);
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
// If we deleted BB and BB was the header of a loop, then the
@@ -251,6 +272,17 @@
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
+// Helper method for ComputeValueKnownInPredecessors. If Value is a
+// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > &Result,
+ Constant *Value, BasicBlock* BB){
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+ Result.push_back(std::make_pair(FoldedCInt, BB));
+ else if (isa<UndefValue>(Value))
+ Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
/// if we can infer that the value is a known ConstantInt in any of our
/// predecessors. If so, return the known list of value and pred BB in the
@@ -260,12 +292,24 @@
///
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
// If V is a constantint, then it is known in all predecessors.
if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
ConstantInt *CI = dyn_cast<ConstantInt>(V);
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(CI, *PI));
+
return true;
}
@@ -281,29 +325,25 @@
/// TODO: Per PR2563, we could infer value range information about a
/// predecessor based on its terminator.
//
- if (LVI) {
- // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
- // "I" is a non-local compare-with-a-constant instruction. This would be
- // able to handle value inequalities better, for example if the compare is
- // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
- // Perhaps getConstantOnEdge should be smart enough to do this?
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
- if (PredCst == 0 ||
- (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
- continue;
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ if (PredCst == 0 ||
+ (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+ continue;
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
- }
-
- return !Result.empty();
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
}
-
- return false;
+
+ return !Result.empty();
}
/// If I is a PHI node, then we know the incoming values for any constants.
@@ -313,8 +353,15 @@
if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ } else {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ // LVI returns null is no value could be determined.
+ if (!CI) continue;
+ PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
}
}
+
return !Result.empty();
}
@@ -338,29 +385,26 @@
else
InterestingVal = ConstantInt::getFalse(I->getContext());
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- bool HasValue = false;
- for (unsigned r = 0, e = Result.size(); r != e; ++r)
- if (Result[r].second == RHSVals[i].second) {
- HasValue = true;
- break;
- }
-
- if (!HasValue) {
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
Result.push_back(RHSVals[i]);
Result.back().first = InterestingVal;
}
}
+
return !Result.empty();
}
@@ -377,8 +421,27 @@
if (Result[i].first)
Result[i].first =
cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+
return true;
}
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first;
+ if (V == 0) V = UndefValue::get(BO->getType());
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+ }
+
+ return !Result.empty();
}
// Handle compare with phi operand, where the PHI is defined in this block.
@@ -394,7 +457,7 @@
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
if (Res == 0) {
- if (!LVI || !isa<Constant>(RHS))
+ if (!isa<Constant>(RHS))
continue;
LazyValueInfo::Tristate
@@ -405,10 +468,8 @@
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
- if (isa<UndefValue>(Res))
- Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
- Result.push_back(std::make_pair(CI, PredBB));
+ if (Constant *ConstRes = dyn_cast<Constant>(Res))
+ PushConstantIntOrUndef(Result, ConstRes, PredBB);
}
return !Result.empty();
@@ -417,30 +478,56 @@
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
- if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isIntegerTy() && // Not vector compare.
- (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
- Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- LazyValueInfo::Tristate
- Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, P, BB);
- if (Res == LazyValueInfo::Unknown)
- continue;
+ if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
- }
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ }
- return !Result.empty();
+ return !Result.empty();
+ }
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first;
+ if (V == 0) V = UndefValue::get(CmpConst->getType());
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+
+ return !Result.empty();
+ }
}
}
- return false;
+
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+ if (CInt) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CInt, *PI));
+ }
+
+ return !Result.empty();
}
@@ -490,6 +577,7 @@
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
@@ -530,7 +618,7 @@
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
- RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
+ BBTerm->getSuccessor(i)->removePredecessor(BB, true);
}
DEBUG(dbgs() << " In block '" << BB->getName()
@@ -542,65 +630,50 @@
Instruction *CondInst = dyn_cast<Instruction>(Condition);
- // If the condition is an instruction defined in another block, see if a
- // predecessor has the same condition:
- // br COND, BBX, BBY
- // BBX:
- // br COND, BBZ, BBW
- if (!LVI &&
- !Condition->hasOneUse() && // Multiple uses.
- (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- if (isa<BranchInst>(BB->getTerminator())) {
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
- if (PBI->isConditional() && PBI->getCondition() == Condition &&
- ProcessBranchOnDuplicateCond(P, BB))
- return true;
- }
- } else {
- assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
- if (PSI->getCondition() == Condition &&
- ProcessSwitchOnDuplicateCond(P, BB))
- return true;
- }
- }
- }
-
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0) {
// FIXME: Unify this with code below.
- if (LVI && ProcessThreadableEdges(Condition, BB))
+ if (ProcessThreadableEdges(Condition, BB))
return true;
return false;
}
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (!LVI &&
- (!isa<PHINode>(CondCmp->getOperand(0)) ||
- cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
- // If we have a comparison, loop over the predecessors to see if there is
- // a condition with a lexically identical value.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
- if (PBI->isConditional() && P != BB) {
- if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
- if (CI->getOperand(0) == CondCmp->getOperand(0) &&
- CI->getOperand(1) == CondCmp->getOperand(1) &&
- CI->getPredicate() == CondCmp->getPredicate()) {
- // TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(P, BB))
- return true;
- }
- }
- }
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+ CondCmp->getOperand(0), CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
}
}
}
@@ -1020,6 +1093,7 @@
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
return false;
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1314,8 +1388,7 @@
<< ", across block:\n "
<< *BB << "\n");
- if (LVI)
- LVI->threadEdge(PredBB, BB, SuccBB);
+ LVI->threadEdge(PredBB, BB, SuccBB);
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
@@ -1386,7 +1459,7 @@
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
@@ -1402,7 +1475,7 @@
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
- RemovePredecessorAndSimplify(BB, PredBB, TD);
+ BB->removePredecessor(PredBB, true);
PredTerm->setSuccessor(i, NewBB);
}
@@ -1541,7 +1614,7 @@
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
@@ -1552,7 +1625,7 @@
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
- RemovePredecessorAndSimplify(BB, PredBB, TD);
+ BB->removePredecessor(PredBB, true);
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LICM.cpp Tue Oct 26 19:48:03 2010
@@ -26,8 +26,7 @@
// pointer. There are no calls in the loop which mod/ref the pointer.
// If these conditions are true, we can promote the loads and stores in the
// loop of the pointer to use a temporary alloca'd variable. We then use
-// the mem2reg functionality to construct the appropriate SSA form for the
-// variable.
+// the SSAUpdater to construct the appropriate SSA form for the value.
//
//===----------------------------------------------------------------------===//
@@ -37,14 +36,16 @@
#include "llvm/DerivedTypes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,7 +67,9 @@
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(&ID) {}
+ LICM() : LoopPass(ID) {
+ initializeLICMPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -76,38 +79,30 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg)
AU.addRequired<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominanceFrontier>();
AU.addPreservedID(LoopSimplifyID);
}
bool doFinalization() {
- // Free the values stored in the map
- for (std::map<Loop *, AliasSetTracker *>::iterator
- I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
- delete I->second;
-
- LoopToAliasMap.clear();
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
return false;
}
private:
- // Various analyses that we use...
AliasAnalysis *AA; // Current AliasAnalysis information
LoopInfo *LI; // Current LoopInfo
- DominatorTree *DT; // Dominator Tree for the current Loop...
- DominanceFrontier *DF; // Current Dominance Frontier
+ DominatorTree *DT; // Dominator Tree for the current Loop.
- // State that is updated as we process loops
+ // State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
- std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
@@ -195,34 +190,26 @@
/// pointerInvalidatedByLoop - Return true if the body of this loop may
/// store into the memory location pointed to by V.
///
- bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+ bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const MDNode *TBAAInfo) {
// Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetForPointer(V, Size).isMod();
+ return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
}
bool canSinkOrHoistInst(Instruction &I);
- bool isLoopInvariantInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
- /// to scalars as we can.
- ///
- void PromoteValuesInLoop();
-
- /// FindPromotableValuesInLoop - Check the current loop for stores to
- /// definite pointers, which are not loaded and stored through may aliases.
- /// If these are found, create an alloca for the value, add it to the
- /// PromotedValues list, and keep track of the mapping from value to
- /// alloca...
- ///
- void FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &Val2AlMap);
+ void PromoteAliasSet(AliasSet &AS);
};
}
char LICM::ID = 0;
-INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -236,19 +223,23 @@
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- DF = &getAnalysis<DominanceFrontier>();
DT = &getAnalysis<DominatorTree>();
CurAST = new AliasSetTracker(*AA);
- // Collect Alias info from subloops
+ // Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
LoopItr != LoopItrE; ++LoopItr) {
Loop *InnerL = *LoopItr;
- AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
- assert (InnerAST && "Where is my AST?");
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
// What if InnerLoop was modified by other passes ?
CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
}
CurLoop = L;
@@ -263,7 +254,7 @@
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops...
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -283,15 +274,24 @@
HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
- // memory references to scalars that we can...
- if (!DisablePromotion && Preheader && L->hasDedicatedExits())
- PromoteValuesInLoop();
-
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I);
+ }
+
// Clear out loops state information for the next iteration
CurLoop = 0;
Preheader = 0;
- LoopToAliasMap[L] = CurAST;
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
return Changed;
}
@@ -308,7 +308,7 @@
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
- // We are processing blocks in reverse dfo, so process children first...
+ // We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
SinkRegion(Children[i]);
@@ -319,6 +319,17 @@
for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
// Check to see if we can sink this instruction to the exit blocks
// of the loop. We can do this if the all users of the instruction are
@@ -350,14 +361,26 @@
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
Instruction &I = *II++;
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
//
- if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+ if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
isSafeToExecuteUnconditionally(I))
hoist(I);
- }
+ }
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -379,10 +402,11 @@
return true;
// Don't hoist loads which have may-aliased stores in loop.
- unsigned Size = 0;
+ uint64_t Size = 0;
if (LI->getType()->isSized())
Size = AA->getTypeStoreSize(LI->getType());
- return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+ LI->getMetadata(LLVMContext::MD_tbaa));
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
// Handle obvious cases efficiently.
AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
@@ -437,20 +461,6 @@
}
-/// isLoopInvariantInst - Return true if all operands of this instruction are
-/// loop invariant. We also filter out non-hoistable instructions here just for
-/// efficiency.
-///
-bool LICM::isLoopInvariantInst(Instruction &I) {
- // The instruction is loop invariant if all of its operands are loop-invariant
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (!CurLoop->isLoopInvariant(I.getOperand(i)))
- return false;
-
- // If we got this far, the instruction is loop invariant!
- return true;
-}
-
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
@@ -460,7 +470,7 @@
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -477,122 +487,101 @@
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.removeFromParent();
- BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
- ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+ I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
}
- } else if (ExitBlocks.empty()) {
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
// The instruction is actually dead if there ARE NO exit blocks.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
- } else {
- // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
- // do all of the hard work of inserting PHI nodes as necessary. We convert
- // the value into a stack object to get it to do this.
-
- // Firstly, we create a stack object to hold the value...
- AllocaInst *AI = 0;
-
- if (!I.getType()->isVoidTy()) {
- AI = new AllocaInst(I.getType(), 0, I.getName(),
- I.getParent()->getParent()->getEntryBlock().begin());
- CurAST->add(AI);
- }
-
- // Secondly, insert load instructions for each use of the instruction
- // outside of the loop.
- while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.use_back());
-
- // If the user is a PHI Node, we actually have to insert load instructions
- // in all predecessor blocks, not in the PHI block itself!
- if (PHINode *UPN = dyn_cast<PHINode>(U)) {
- // Only insert into each predecessor once, so that we don't have
- // different incoming values from the same block!
- std::map<BasicBlock*, Value*> InsertedBlocks;
- for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
- if (UPN->getIncomingValue(i) == &I) {
- BasicBlock *Pred = UPN->getIncomingBlock(i);
- Value *&PredVal = InsertedBlocks[Pred];
- if (!PredVal) {
- // Insert a new load instruction right before the terminator in
- // the predecessor block.
- PredVal = new LoadInst(AI, "", Pred->getTerminator());
- CurAST->add(cast<LoadInst>(PredVal));
- }
-
- UPN->setIncomingValue(i, PredVal);
- }
-
- } else {
- LoadInst *L = new LoadInst(AI, "", U);
- U->replaceUsesOfWith(&I, L);
- CurAST->add(L);
- }
- }
-
- // Thirdly, insert a copy of the instruction in each exit block of the loop
- // that is dominated by the instruction, storing the result into the memory
- // location. Be careful not to insert the instruction into any particular
- // basic block more than once.
- std::set<BasicBlock*> InsertedBlocks;
- BasicBlock *InstOrigBB = I.getParent();
-
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
-
- if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
- // If we haven't already processed this exit block, do so now.
- if (InsertedBlocks.insert(ExitBlock).second) {
- // Insert the code after the last PHI node...
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
- // If this is the first exit block processed, just move the original
- // instruction, otherwise clone the original instruction and insert
- // the copy.
- Instruction *New;
- if (InsertedBlocks.size() == 1) {
- I.removeFromParent();
- ExitBlock->getInstList().insert(InsertPt, &I);
- New = &I;
- } else {
- New = I.clone();
- CurAST->copyValue(&I, New);
- if (!I.getName().empty())
- New->setName(I.getName()+".le");
- ExitBlock->getInstList().insert(InsertPt, New);
- }
-
- // Now that we have inserted the instruction, store it into the alloca
- if (AI) new StoreInst(New, AI, InsertPt);
- }
- }
- }
-
- // If the instruction doesn't dominate any exit blocks, it must be dead.
- if (InsertedBlocks.empty()) {
- CurAST->deleteValue(&I);
- I.eraseFromParent();
- }
-
- // Finally, promote the fine value to SSA form.
- if (AI) {
- std::vector<AllocaInst*> Allocas;
- Allocas.push_back(AI);
- PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
}
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
+ }
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
}
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
}
/// hoist - When an instruction is found to only use loop invariant operands
@@ -602,12 +591,8 @@
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
<< I << "\n");
- // Remove the instruction from its current basic block... but don't delete the
- // instruction.
- I.removeFromParent();
-
- // Insert the new node in Preheader, before the terminator.
- Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -647,223 +632,269 @@
return true;
}
-
-/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant. We promote these memory locations to use allocas
-/// instead. These allocas can easily be raised to register values by the
-/// PromoteMem2Reg functionality.
+/// which are loop invariant.
///
-void LICM::PromoteValuesInLoop() {
- // PromotedValues - List of values that are promoted out of the loop. Each
- // value has an alloca instruction for it, and a canonical version of the
- // pointer.
- std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
- std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
-
- FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
- if (ValueToAllocaMap.empty()) return; // If there are values to promote.
-
- Changed = true;
- NumPromoted += PromotedValues.size();
-
- std::vector<Value*> PointerValueNumbers;
-
- // Emit a copy from the value into the alloca'd value in the loop preheader
- TerminatorInst *LoopPredInst = Preheader->getTerminator();
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- Value *Ptr = PromotedValues[i].second;
-
- // If we are promoting a pointer value, update alias information for the
- // inserted load.
- Value *LoadValue = 0;
- if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
- // Locate a load or store through the pointer, and assign the same value
- // to LI as we are loading or storing. Since we know that the value is
- // stored in this loop, this will always succeed.
- for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- LoadValue = LI;
- break;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(1) == Ptr) {
- LoadValue = SI->getOperand(0);
- break;
- }
- }
- }
- assert(LoadValue && "No store through the pointer found!");
- PointerValueNumbers.push_back(LoadValue); // Remember this for later.
- }
-
- // Load from the memory we are promoting.
- LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
-
- if (LoadValue) CurAST->copyValue(LoadValue, LI);
-
- // Store into the temporary alloca.
- new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
- }
+void LICM::PromoteAliasSet(AliasSet &AS) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
- // Scan the basic blocks in the loop, replacing uses of our pointers with
- // uses of the allocas in question.
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
//
- for (Loop::block_iterator I = CurLoop->block_begin(),
- E = CurLoop->block_end(); I != E; ++I) {
- BasicBlock *BB = *I;
- // Rewrite all loads and stores in the block of the pointer...
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(L->getOperand(0));
- if (I != ValueToAllocaMap.end())
- L->setOperand(0, I->second); // Rewrite load instruction...
- } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(S->getOperand(1));
- if (I != ValueToAllocaMap.end())
- S->setOperand(1, I->second); // Rewrite store instruction...
- }
- }
- }
-
- // Now that the body of the loop uses the allocas instead of the original
- // memory locations, insert code to copy the alloca value back into the
- // original memory location on all exits from the loop. Note that we only
- // want to insert one copy of the code in each exit block, though the loop may
- // exit to the same block more than once.
+ // for () { if (c) *P += 1; }
//
- SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- if (!ProcessedBlocks.insert(ExitBlocks[i]))
- continue;
-
- // Copy all of the allocas into their memory locations.
- BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
- Instruction *InsertPos = BI;
- unsigned PVN = 0;
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- // Load from the alloca.
- LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
- // If this is a pointer type, update alias info appropriately.
- if (LI->getType()->isPointerTy())
- CurAST->copyValue(PointerValueNumbers[PVN++], LI);
-
- // Store into the memory we promoted.
- new StoreInst(LI, PromotedValues[i].second, InsertPos);
- }
- }
-
- // Now that we have done the deed, use the mem2reg functionality to promote
- // all of the new allocas we just created into real SSA registers.
+ // into:
//
- std::vector<AllocaInst*> PromotedAllocas;
- PromotedAllocas.reserve(PromotedValues.size());
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
- PromotedAllocas.push_back(PromotedValues[i].first);
- PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
-}
-
-/// FindPromotableValuesInLoop - Check the current loop for stores to definite
-/// pointers, which are not loaded and stored through may aliases and are safe
-/// for promotion. If these are found, create an alloca for the value, add it
-/// to the PromotedValues list, and keep track of the mapping from value to
-/// alloca.
-void LICM::FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
- Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
- // Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
- // We can promote this alias set if it has a store, if it is a "Must" alias
- // set, if the pointer is loop invariant, and if we are not eliminating any
- // volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
- continue;
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
- assert(!AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
- Value *V = AS.begin()->getValue();
-
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
- {
- bool PointerOk = true;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- if (V->getType() != I->getValue()->getType()) {
- PointerOk = false;
- break;
- }
- if (!PointerOk)
- continue;
- }
-
- // It isn't safe to promote a load/store from the loop if the load/store is
- // conditional. For example, turning:
- //
- // for () { if (c) *P += 1; }
- //
- // into:
- //
- // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
- //
- // is not safe, because *P may only be valid to access if 'c' is true.
- //
- // It is safe to promote P if all uses are direct load/stores and if at
- // least one is guaranteed to be executed.
- bool GuaranteedToExecute = false;
- bool InvalidInst = false;
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
UI != UE; ++UI) {
- // Ignore instructions not in this loop.
+ // Ignore instructions that are outside the loop.
Instruction *Use = dyn_cast<Instruction>(*UI);
if (!Use || !CurLoop->contains(Use))
continue;
-
- if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
- InvalidInst = true;
- break;
- }
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ if (isa<LoadInst>(Use))
+ assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+ else if (isa<StoreInst>(Use)) {
+ assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ if (Use->getOperand(0) == ASIV) return;
+ } else
+ return; // Not a load or store.
if (!GuaranteedToExecute)
GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+ LoopUses.push_back(Use);
}
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
- // If there is an non-load/store instruction in the loop, we can't promote
- // it. If there isn't a guaranteed-to-execute instruction, we can't
- // promote.
- if (InvalidInst || !GuaranteedToExecute)
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ // It wants to know some value of the same type as what we'll be inserting.
+ Value *SomeValue;
+ if (isa<LoadInst>(LoopUses[0]))
+ SomeValue = LoopUses[0];
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+ SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+ // First step: bucket up uses of the pointers by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the loop with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+ Instruction *User = LoopUses[LoopUse];
+ std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (isa<StoreInst>(User)) {
+ SSA.AddAvailableValue(User->getParent(),
+ cast<StoreInst>(User)->getOperand(0));
+ } else {
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ }
+ BlockUses.clear();
continue;
+ }
+
+ // Otherwise, check to see if this block is all loads. If so, we can queue
+ // them all as live in loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
- const Type *Ty = cast<PointerType>(V->getType())->getElementType();
- AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
- PromotedValues.push_back(std::make_pair(AI, V));
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
- // Update the AST and alias analysis.
- CurAST->copyValue(V, AI);
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ BasicBlock *BB = User->getParent();
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(S->getOperand(1))) continue;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Now that all the intra-loop values are classified, set up the preheader.
+ // It gets a load of the pointer we're promoting, and it is the live-out value
+ // from the preheader.
+ LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Now that the preheader is good to go, set up the exit blocks. Each exit
+ // block gets a store of the live-out values that feed them. Since we've
+ // already told the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
- DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ ALoad->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(ALoad, NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // If the preheader load is itself a pointer, we need to tell alias analysis
+ // about the new pointer we created in the preheader block and about any PHI
+ // nodes that just got inserted.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ CurAST->copyValue(SomeValue, PreheaderLoad);
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(SomeValue, NewPHIs[i]);
}
+
+ // Now that everything is rewritten, delete the old instructions from the body
+ // of the loop. They should all be dead now.
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ User->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(User, NewVal);
+ }
+
+ CurAST->deleteValue(User);
+ User->eraseFromParent();
+ }
+
+ // fwew, we're done!
}
+
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
@@ -873,7 +904,7 @@
/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
/// set.
void LICM::deleteAnalysisValue(Value *V, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopDeletion.cpp Tue Oct 26 19:48:03 2010
@@ -28,7 +28,9 @@
class LoopDeletion : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(&ID) {}
+ LoopDeletion() : LoopPass(ID) {
+ initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+ }
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -55,8 +57,15 @@
}
char LoopDeletion::ID = 0;
-INITIALIZE_PASS(LoopDeletion, "loop-deletion",
- "Delete dead loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
Pass* llvm::createLoopDeletionPass() {
return new LoopDeletion();
Removed: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (removed)
@@ -1,1270 +0,0 @@
-//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Loop Index Splitting Pass. This pass handles three
-// kinds of loops.
-//
-// [1] A loop may be eliminated if the body is executed exactly once.
-// For example,
-//
-// for (i = 0; i < N; ++i) {
-// if (i == X) {
-// body;
-// }
-// }
-//
-// is transformed to
-//
-// i = X;
-// body;
-//
-// [2] A loop's iteration space may be shrunk if the loop body is executed
-// for a proper sub-range of the loop's iteration space. For example,
-//
-// for (i = 0; i < N; ++i) {
-// if (i > A && i < B) {
-// ...
-// }
-// }
-//
-// is transformed to iterators from A to B, if A > 0 and B < N.
-//
-// [3] A loop may be split if the loop body is dominated by a branch.
-// For example,
-//
-// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
-//
-// is transformed into
-//
-// AEV = BSV = SV
-// for (i = LB; i < min(UB, AEV); ++i)
-// A;
-// for (i = max(LB, BSV); i < UB; ++i);
-// B;
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-index-split"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-STATISTIC(NumIndexSplit, "Number of loop index split");
-STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
-STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
-
-namespace {
-
- class LoopIndexSplit : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopIndexSplit() : LoopPass(&ID) {}
-
- // Index split Loop L. Return true if loop is split.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<ScalarEvolution>();
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- }
-
- private:
- /// processOneIterationLoop -- Eliminate loop if loop body is executed
- /// only once. For example,
- /// for (i = 0; i < N; ++i) {
- /// if ( i == X) {
- /// ...
- /// }
- /// }
- ///
- bool processOneIterationLoop();
-
- // -- Routines used by updateLoopIterationSpace();
-
- /// updateLoopIterationSpace -- Update loop's iteration space if loop
- /// body is executed for certain IV range only. For example,
- ///
- /// for (i = 0; i < N; ++i) {
- /// if ( i > A && i < B) {
- /// ...
- /// }
- /// }
- /// is transformed to iterators from A to B, if A > 0 and B < N.
- ///
- bool updateLoopIterationSpace();
-
- /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
- /// with a loop invariant value. Update loop's lower and upper bound based on
- /// the loop invariant value.
- bool restrictLoopBound(ICmpInst &Op);
-
- // --- Routines used by splitLoop(). --- /
-
- bool splitLoop();
-
- /// removeBlocks - Remove basic block DeadBB and all blocks dominated by
- /// DeadBB. This routine is used to remove split condition's dead branch,
- /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
- void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
-
- /// moveExitCondition - Move exit condition EC into split condition block.
- void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
- BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
- PHINode *IV, Instruction *IVAdd, Loop *LP,
- unsigned);
-
- /// updatePHINodes - CFG has been changed.
- /// Before
- /// - ExitBB's single predecessor was Latch
- /// - Latch's second successor was Header
- /// Now
- /// - ExitBB's single predecessor was Header
- /// - Latch's one and only successor was Header
- ///
- /// Update ExitBB PHINodes' to reflect this change.
- void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
- BasicBlock *Header,
- PHINode *IV, Instruction *IVIncrement, Loop *LP);
-
- // --- Utility routines --- /
-
- /// cleanBlock - A block is considered clean if all non terminal
- /// instructions are either PHINodes or IV based values.
- bool cleanBlock(BasicBlock *BB);
-
- /// IVisLT - If Op is comparing IV based value with an loop invariant and
- /// IV based value is less than the loop invariant then return the loop
- /// invariant. Otherwise return NULL.
- Value * IVisLT(ICmpInst &Op);
-
- /// IVisLE - If Op is comparing IV based value with an loop invariant and
- /// IV based value is less than or equal to the loop invariant then
- /// return the loop invariant. Otherwise return NULL.
- Value * IVisLE(ICmpInst &Op);
-
- /// IVisGT - If Op is comparing IV based value with an loop invariant and
- /// IV based value is greater than the loop invariant then return the loop
- /// invariant. Otherwise return NULL.
- Value * IVisGT(ICmpInst &Op);
-
- /// IVisGE - If Op is comparing IV based value with an loop invariant and
- /// IV based value is greater than or equal to the loop invariant then
- /// return the loop invariant. Otherwise return NULL.
- Value * IVisGE(ICmpInst &Op);
-
- private:
-
- // Current Loop information.
- Loop *L;
- LPPassManager *LPM;
- LoopInfo *LI;
- DominatorTree *DT;
- DominanceFrontier *DF;
-
- PHINode *IndVar;
- ICmpInst *ExitCondition;
- ICmpInst *SplitCondition;
- Value *IVStartValue;
- Value *IVExitValue;
- Instruction *IVIncrement;
- SmallPtrSet<Value *, 4> IVBasedValues;
- };
-}
-
-char LoopIndexSplit::ID = 0;
-INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
- "Index Split Loops", false, false);
-
-Pass *llvm::createLoopIndexSplitPass() {
- return new LoopIndexSplit();
-}
-
-// Index split Loop L. Return true if loop is split.
-bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
- L = IncomingLoop;
- LPM = &LPM_Ref;
-
- // If LoopSimplify form is not available, stay out of trouble.
- if (!L->isLoopSimplifyForm())
- return false;
-
- // FIXME - Nested loops make dominator info updates tricky.
- if (!L->getSubLoops().empty())
- return false;
-
- DT = &getAnalysis<DominatorTree>();
- LI = &getAnalysis<LoopInfo>();
- DF = &getAnalysis<DominanceFrontier>();
-
- // Initialize loop data.
- IndVar = L->getCanonicalInductionVariable();
- if (!IndVar) return false;
-
- bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
- IVStartValue = IndVar->getIncomingValue(!P1InLoop);
- IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
- if (!IVIncrement) return false;
-
- IVBasedValues.clear();
- IVBasedValues.insert(IndVar);
- IVBasedValues.insert(IVIncrement);
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end();
- BI != BE; ++BI) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI))
- if (BO != IVIncrement
- && (BO->getOpcode() == Instruction::Add
- || BO->getOpcode() == Instruction::Sub))
- if (IVBasedValues.count(BO->getOperand(0))
- && L->isLoopInvariant(BO->getOperand(1)))
- IVBasedValues.insert(BO);
- }
-
- // Reject loop if loop exit condition is not suitable.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return false;
- BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!EBR) return false;
- ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
- if (!ExitCondition) return false;
- if (ExitingBlock != L->getLoopLatch()) return false;
- IVExitValue = ExitCondition->getOperand(1);
- if (!L->isLoopInvariant(IVExitValue))
- IVExitValue = ExitCondition->getOperand(0);
- if (!L->isLoopInvariant(IVExitValue))
- return false;
- if (!IVBasedValues.count(
- ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
- return false;
-
- // If start value is more then exit value where induction variable
- // increments by 1 then we are potentially dealing with an infinite loop.
- // Do not index split this loop.
- if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
- if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
- if (SV->getSExtValue() > EV->getSExtValue())
- return false;
-
- if (processOneIterationLoop())
- return true;
-
- if (updateLoopIterationSpace())
- return true;
-
- if (splitLoop())
- return true;
-
- return false;
-}
-
-// --- Helper routines ---
-// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
-static bool isUsedOutsideLoop(Value *V, Loop *L) {
- for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (!L->contains(cast<Instruction>(*UI)))
- return true;
- return false;
-}
-
-// Return V+1
-static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt,
- LLVMContext &Context) {
- Constant *One = ConstantInt::get(V->getType(), 1, Sign);
- return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
-}
-
-// Return V-1
-static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
- LLVMContext &Context) {
- Constant *One = ConstantInt::get(V->getType(), 1, Sign);
- return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
-}
-
-// Return min(V1, V1)
-static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
-
- Value *C = new ICmpInst(InsertPt,
- Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- V1, V2, "lsp");
- return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
-}
-
-// Return max(V1, V2)
-static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
-
- Value *C = new ICmpInst(InsertPt,
- Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- V1, V2, "lsp");
- return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
-}
-
-/// processOneIterationLoop -- Eliminate loop if loop body is executed
-/// only once. For example,
-/// for (i = 0; i < N; ++i) {
-/// if ( i == X) {
-/// ...
-/// }
-/// }
-///
-bool LoopIndexSplit::processOneIterationLoop() {
- SplitCondition = NULL;
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
- BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
- if (!BR) return false;
- if (!isa<BranchInst>(Latch->getTerminator())) return false;
- if (BR->isUnconditional()) return false;
- SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
- if (!SplitCondition) return false;
- if (SplitCondition == ExitCondition) return false;
- if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
- if (BR->getOperand(1) != Latch) return false;
- if (!IVBasedValues.count(SplitCondition->getOperand(0))
- && !IVBasedValues.count(SplitCondition->getOperand(1)))
- return false;
-
- // If IV is used outside the loop then this loop traversal is required.
- // FIXME: Calculate and use last IV value.
- if (isUsedOutsideLoop(IVIncrement, L))
- return false;
-
- // If BR operands are not IV or not loop invariants then skip this loop.
- Value *OPV = SplitCondition->getOperand(0);
- Value *SplitValue = SplitCondition->getOperand(1);
- if (!L->isLoopInvariant(SplitValue))
- std::swap(OPV, SplitValue);
- if (!L->isLoopInvariant(SplitValue))
- return false;
- Instruction *OPI = dyn_cast<Instruction>(OPV);
- if (!OPI)
- return false;
- if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
- return false;
- Value *StartValue = IVStartValue;
- Value *ExitValue = IVExitValue;;
-
- if (OPV != IndVar) {
- // If BR operand is IV based then use this operand to calculate
- // effective conditions for loop body.
- BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
- if (!BOPV)
- return false;
- if (BOPV->getOpcode() != Instruction::Add)
- return false;
- StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
- ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
- }
-
- if (!cleanBlock(Header))
- return false;
-
- if (!cleanBlock(Latch))
- return false;
-
- // If the merge point for BR is not loop latch then skip this loop.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- return false;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- return false;
- }
-
- // Now, Current loop L contains compare instruction
- // that compares induction variable, IndVar, against loop invariant. And
- // entire (i.e. meaningful) loop body is dominated by this compare
- // instruction. In such case eliminate
- // loop structure surrounding this loop body. For example,
- // for (int i = start; i < end; ++i) {
- // if ( i == somevalue) {
- // loop_body
- // }
- // }
- // can be transformed into
- // if (somevalue >= start && somevalue < end) {
- // i = somevalue;
- // loop_body
- // }
-
- // Replace index variable with split value in loop body. Loop body is executed
- // only when index variable is equal to split value.
- IndVar->replaceAllUsesWith(SplitValue);
-
- // Replace split condition in header.
- // Transform
- // SplitCondition : icmp eq i32 IndVar, SplitValue
- // into
- // c1 = icmp uge i32 SplitValue, StartValue
- // c2 = icmp ult i32 SplitValue, ExitValue
- // and i32 c1, c2
- Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ?
- ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
- SplitValue, StartValue, "lisplit");
-
- CmpInst::Predicate C2P = ExitCondition->getPredicate();
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- if (LatchBR->getOperand(1) != Header)
- C2P = CmpInst::getInversePredicate(C2P);
- Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
- Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
-
- SplitCondition->replaceAllUsesWith(NSplitCond);
- SplitCondition->eraseFromParent();
-
- // Remove Latch to Header edge.
- BasicBlock *LatchSucc = NULL;
- Header->removePredecessor(Latch);
- for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
- SI != E; ++SI) {
- if (Header != *SI)
- LatchSucc = *SI;
- }
-
- // Clean up latch block.
- Value *LatchBRCond = LatchBR->getCondition();
- LatchBR->setUnconditionalDest(LatchSucc);
- RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
-
- LPM->deleteLoopFromQueue(L);
-
- // Update Dominator Info.
- // Only CFG change done is to remove Latch to Header edge. This
- // does not change dominator tree because Latch did not dominate
- // Header.
- if (DF) {
- DominanceFrontier::iterator HeaderDF = DF->find(Header);
- if (HeaderDF != DF->end())
- DF->removeFromFrontier(HeaderDF, Header);
-
- DominanceFrontier::iterator LatchDF = DF->find(Latch);
- if (LatchDF != DF->end())
- DF->removeFromFrontier(LatchDF, Header);
- }
-
- ++NumIndexSplitRemoved;
- return true;
-}
-
-/// restrictLoopBound - Op dominates loop body. Op compares an IV based value
-/// with a loop invariant value. Update loop's lower and upper bound based on
-/// the loop invariant value.
-bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
- bool Sign = Op.isSigned();
- Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
- if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
- BranchInst *EBR =
- cast<BranchInst>(ExitCondition->getParent()->getTerminator());
- ExitCondition->setPredicate(ExitCondition->getInversePredicate());
- BasicBlock *T = EBR->getSuccessor(0);
- EBR->setSuccessor(0, EBR->getSuccessor(1));
- EBR->setSuccessor(1, T);
- }
-
- LLVMContext &Context = Op.getContext();
-
- // New upper and lower bounds.
- Value *NLB = NULL;
- Value *NUB = NULL;
- if (Value *V = IVisLT(Op)) {
- // Restrict upper bound.
- if (IVisLE(*ExitCondition))
- V = getMinusOne(V, Sign, PHTerm, Context);
- NUB = getMin(V, IVExitValue, Sign, PHTerm);
- } else if (Value *V = IVisLE(Op)) {
- // Restrict upper bound.
- if (IVisLT(*ExitCondition))
- V = getPlusOne(V, Sign, PHTerm, Context);
- NUB = getMin(V, IVExitValue, Sign, PHTerm);
- } else if (Value *V = IVisGT(Op)) {
- // Restrict lower bound.
- V = getPlusOne(V, Sign, PHTerm, Context);
- NLB = getMax(V, IVStartValue, Sign, PHTerm);
- } else if (Value *V = IVisGE(Op))
- // Restrict lower bound.
- NLB = getMax(V, IVStartValue, Sign, PHTerm);
-
- if (!NLB && !NUB)
- return false;
-
- if (NLB) {
- unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
- IndVar->setIncomingValue(i, NLB);
- }
-
- if (NUB) {
- unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
- ExitCondition->setOperand(i, NUB);
- }
- return true;
-}
-
-/// updateLoopIterationSpace -- Update loop's iteration space if loop
-/// body is executed for certain IV range only. For example,
-///
-/// for (i = 0; i < N; ++i) {
-/// if ( i > A && i < B) {
-/// ...
-/// }
-/// }
-/// is transformed to iterators from A to B, if A > 0 and B < N.
-///
-bool LoopIndexSplit::updateLoopIterationSpace() {
- SplitCondition = NULL;
- if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
- || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
- return false;
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
- BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
- if (!BR) return false;
- if (!isa<BranchInst>(Latch->getTerminator())) return false;
- if (BR->isUnconditional()) return false;
- BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
- if (!AND) return false;
- if (AND->getOpcode() != Instruction::And) return false;
- ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
- ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
- if (!Op0 || !Op1)
- return false;
- IVBasedValues.insert(AND);
- IVBasedValues.insert(Op0);
- IVBasedValues.insert(Op1);
- if (!cleanBlock(Header)) return false;
- BasicBlock *ExitingBlock = ExitCondition->getParent();
- if (!cleanBlock(ExitingBlock)) return false;
-
- // If the merge point for BR is not loop latch then skip this loop.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- return false;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- return false;
- }
-
- // Verify that loop exiting block has only two predecessor, where one pred
- // is split condition block. The other predecessor will become exiting block's
- // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
- // more then two predecessors. This requires extra work in updating dominator
- // information.
- BasicBlock *ExitingBBPred = NULL;
- for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
- PI != PE; ++PI) {
- BasicBlock *BB = *PI;
- if (Header == BB)
- continue;
- if (ExitingBBPred)
- return false;
- else
- ExitingBBPred = BB;
- }
-
- if (!restrictLoopBound(*Op0))
- return false;
-
- if (!restrictLoopBound(*Op1))
- return false;
-
- // Update CFG.
- if (BR->getSuccessor(0) == ExitingBlock)
- BR->setUnconditionalDest(BR->getSuccessor(1));
- else
- BR->setUnconditionalDest(BR->getSuccessor(0));
-
- AND->eraseFromParent();
- if (Op0->use_empty())
- Op0->eraseFromParent();
- if (Op1->use_empty())
- Op1->eraseFromParent();
-
- // Update domiantor info. Now, ExitingBlock has only one predecessor,
- // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
- DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
-
- BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
- if (L->contains(ExitBlock))
- ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
-
- // If ExitingBlock is a member of the loop basic blocks' DF list then
- // replace ExitingBlock with header and exit block in the DF list
- DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- if (BB == Header || BB == ExitingBlock)
- continue;
- DominanceFrontier::iterator BBDF = DF->find(BB);
- DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
- DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
- while (DomSetI != DomSetE) {
- DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
- ++DomSetI;
- BasicBlock *DFBB = *CurrentItr;
- if (DFBB == ExitingBlock) {
- BBDF->second.erase(DFBB);
- for (DominanceFrontier::DomSetType::iterator
- EBI = ExitingBlockDF->second.begin(),
- EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI)
- BBDF->second.insert(*EBI);
- }
- }
- }
- ++NumRestrictBounds;
- return true;
-}
-
-/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
-/// This routine is used to remove split condition's dead branch, dominated by
-/// DeadBB. LiveBB dominates split conidition's other branch.
-void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
- BasicBlock *LiveBB) {
-
- // First update DeadBB's dominance frontier.
- SmallVector<BasicBlock *, 8> FrontierBBs;
- DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
- if (DeadBBDF != DF->end()) {
- SmallVector<BasicBlock *, 8> PredBlocks;
-
- DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
- for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
- DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI)
- {
- BasicBlock *FrontierBB = *DeadBBSetI;
- FrontierBBs.push_back(FrontierBB);
-
- // Rremove any PHI incoming edge from blocks dominated by DeadBB.
- PredBlocks.clear();
- for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (DT->dominates(DeadBB, P))
- PredBlocks.push_back(P);
- }
-
- for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
- FBI != FBE; ++FBI) {
- if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
- for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
- PE = PredBlocks.end(); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- PN->removeIncomingValue(P);
- }
- }
- else
- break;
- }
- }
- }
-
- // Now remove DeadBB and all nodes dominated by DeadBB in df order.
- SmallVector<BasicBlock *, 32> WorkList;
- DomTreeNode *DN = DT->getNode(DeadBB);
- for (df_iterator<DomTreeNode*> DI = df_begin(DN),
- E = df_end(DN); DI != E; ++DI) {
- BasicBlock *BB = DI->getBlock();
- WorkList.push_back(BB);
- BB->replaceAllUsesWith(UndefValue::get(
- Type::getLabelTy(DeadBB->getContext())));
- }
-
- while (!WorkList.empty()) {
- BasicBlock *BB = WorkList.pop_back_val();
- LPM->deleteSimpleAnalysisValue(BB, LP);
- for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
- BBI != BBE; ) {
- Instruction *I = BBI;
- ++BBI;
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- LPM->deleteSimpleAnalysisValue(I, LP);
- I->eraseFromParent();
- }
- DT->eraseNode(BB);
- DF->removeBlock(BB);
- LI->removeBlock(BB);
- BB->eraseFromParent();
- }
-
- // Update Frontier BBs' dominator info.
- while (!FrontierBBs.empty()) {
- BasicBlock *FBB = FrontierBBs.pop_back_val();
- BasicBlock *NewDominator = FBB->getSinglePredecessor();
- if (!NewDominator) {
- pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
- NewDominator = *PI;
- ++PI;
- if (NewDominator != LiveBB) {
- for(; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (P == LiveBB) {
- NewDominator = LiveBB;
- break;
- }
- NewDominator = DT->findNearestCommonDominator(NewDominator, P);
- }
- }
- }
- assert (NewDominator && "Unable to fix dominator info.");
- DT->changeImmediateDominator(FBB, NewDominator);
- DF->changeImmediateDominator(FBB, NewDominator, DT);
- }
-
-}
-
-// moveExitCondition - Move exit condition EC into split condition block CondBB.
-void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
- BasicBlock *ExitBB, ICmpInst *EC,
- ICmpInst *SC, PHINode *IV,
- Instruction *IVAdd, Loop *LP,
- unsigned ExitValueNum) {
-
- BasicBlock *ExitingBB = EC->getParent();
- Instruction *CurrentBR = CondBB->getTerminator();
-
- // Move exit condition into split condition block.
- EC->moveBefore(CurrentBR);
- EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
-
- // Move exiting block's branch into split condition block. Update its branch
- // destination.
- BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
- ExitingBR->moveBefore(CurrentBR);
- BasicBlock *OrigDestBB = NULL;
- if (ExitingBR->getSuccessor(0) == ExitBB) {
- OrigDestBB = ExitingBR->getSuccessor(1);
- ExitingBR->setSuccessor(1, ActiveBB);
- }
- else {
- OrigDestBB = ExitingBR->getSuccessor(0);
- ExitingBR->setSuccessor(0, ActiveBB);
- }
-
- // Remove split condition and current split condition branch.
- SC->eraseFromParent();
- CurrentBR->eraseFromParent();
-
- // Connect exiting block to original destination.
- BranchInst::Create(OrigDestBB, ExitingBB);
-
- // Update PHINodes
- updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
-
- // Fix dominator info.
- // ExitBB is now dominated by CondBB
- DT->changeImmediateDominator(ExitBB, CondBB);
- DF->changeImmediateDominator(ExitBB, CondBB, DT);
-
- // Blocks outside the loop may have been in the dominance frontier of blocks
- // inside the condition; this is now impossible because the blocks inside the
- // condition no loger dominate the exit. Remove the relevant blocks from
- // the dominance frontiers.
- for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
- I != E; ++I) {
- if (!DT->properlyDominates(CondBB, *I)) continue;
- DominanceFrontier::iterator BBDF = DF->find(*I);
- DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
- DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
- while (DomSetI != DomSetE) {
- DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
- ++DomSetI;
- BasicBlock *DFBB = *CurrentItr;
- if (!LP->contains(DFBB))
- BBDF->second.erase(DFBB);
- }
- }
-}
-
-/// updatePHINodes - CFG has been changed.
-/// Before
-/// - ExitBB's single predecessor was Latch
-/// - Latch's second successor was Header
-/// Now
-/// - ExitBB's single predecessor is Header
-/// - Latch's one and only successor is Header
-///
-/// Update ExitBB PHINodes' to reflect this change.
-void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
- BasicBlock *Header,
- PHINode *IV, Instruction *IVIncrement,
- Loop *LP) {
-
- for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end();
- BI != BE; ) {
- PHINode *PN = dyn_cast<PHINode>(BI);
- ++BI;
- if (!PN)
- break;
-
- Value *V = PN->getIncomingValueForBlock(Latch);
- if (PHINode *PHV = dyn_cast<PHINode>(V)) {
- // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
- // in Header which is new incoming value for PN.
- Value *NewV = NULL;
- for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end();
- UI != E; ++UI)
- if (PHINode *U = dyn_cast<PHINode>(*UI))
- if (LP->contains(U)) {
- NewV = U;
- break;
- }
-
- // Add incoming value from header only if PN has any use inside the loop.
- if (NewV)
- PN->addIncoming(NewV, Header);
-
- } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
- // If this instruction is IVIncrement then IV is new incoming value
- // from header otherwise this instruction must be incoming value from
- // header because loop is in LCSSA form.
- if (PHI == IVIncrement)
- PN->addIncoming(IV, Header);
- else
- PN->addIncoming(V, Header);
- } else
- // Otherwise this is an incoming value from header because loop is in
- // LCSSA form.
- PN->addIncoming(V, Header);
-
- // Remove incoming value from Latch.
- PN->removeIncomingValue(Latch);
- }
-}
-
-bool LoopIndexSplit::splitLoop() {
- SplitCondition = NULL;
- if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
- || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
- return false;
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- BranchInst *SBR = NULL; // Split Condition Branch
- BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
- // If Exiting block includes loop variant instructions then this
- // loop may not be split safely.
- BasicBlock *ExitingBlock = ExitCondition->getParent();
- if (!cleanBlock(ExitingBlock)) return false;
-
- LLVMContext &Context = Header->getContext();
-
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
- if (!BR || BR->isUnconditional()) continue;
- ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
- if (!CI || CI == ExitCondition
- || CI->getPredicate() == ICmpInst::ICMP_NE
- || CI->getPredicate() == ICmpInst::ICMP_EQ)
- continue;
-
- // Unable to handle triangle loops at the moment.
- // In triangle loop, split condition is in header and one of the
- // the split destination is loop latch. If split condition is EQ
- // then such loops are already handle in processOneIterationLoop().
- if (Header == (*I)
- && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
- continue;
-
- // If the block does not dominate the latch then this is not a diamond.
- // Such loop may not benefit from index split.
- if (!DT->dominates((*I), Latch))
- continue;
-
- // If split condition branches heads do not have single predecessor,
- // SplitCondBlock, then is not possible to remove inactive branch.
- if (!BR->getSuccessor(0)->getSinglePredecessor()
- || !BR->getSuccessor(1)->getSinglePredecessor())
- return false;
-
- // If the merge point for BR is not loop latch then skip this condition.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- continue;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- continue;
- }
- SplitCondition = CI;
- SBR = BR;
- break;
- }
-
- if (!SplitCondition)
- return false;
-
- // If the predicate sign does not match then skip.
- if (ExitCondition->isSigned() != SplitCondition->isSigned())
- return false;
-
- unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
- unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
- Value *SplitValue = SplitCondition->getOperand(SVOpNum);
- if (!L->isLoopInvariant(SplitValue))
- return false;
- if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
- return false;
-
- // Check for side effects.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
-
- assert(DT->dominates(Header, BB));
- if (DT->properlyDominates(SplitCondition->getParent(), BB))
- continue;
-
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- Instruction *Inst = BI;
-
- if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
- && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
- return false;
- }
- }
-
- // Normalize loop conditions so that it is easier to calculate new loop
- // bounds.
- if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
- ExitCondition->setPredicate(ExitCondition->getInversePredicate());
- BasicBlock *T = EBR->getSuccessor(0);
- EBR->setSuccessor(0, EBR->getSuccessor(1));
- EBR->setSuccessor(1, T);
- }
-
- if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
- SplitCondition->setPredicate(SplitCondition->getInversePredicate());
- BasicBlock *T = SBR->getSuccessor(0);
- SBR->setSuccessor(0, SBR->getSuccessor(1));
- SBR->setSuccessor(1, T);
- }
-
- //[*] Calculate new loop bounds.
- Value *AEV = SplitValue;
- Value *BSV = SplitValue;
- bool Sign = SplitCondition->isSigned();
- Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
- if (IVisLT(*ExitCondition)) {
- if (IVisLT(*SplitCondition)) {
- /* Do nothing */
- }
- else if (IVisLE(*SplitCondition)) {
- AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- } else {
- assert (0 && "Unexpected split condition!");
- }
- }
- else if (IVisLE(*ExitCondition)) {
- if (IVisLT(*SplitCondition)) {
- AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
- }
- else if (IVisLE(*SplitCondition)) {
- BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- } else {
- assert (0 && "Unexpected split condition!");
- }
- } else {
- assert (0 && "Unexpected exit condition!");
- }
- AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
- BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
-
- // [*] Clone Loop
- ValueMap<const Value *, Value *> VMap;
- Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
- Loop *ALoop = L;
-
- // [*] ALoop's exiting edge enters BLoop's header.
- // ALoop's original exit block becomes BLoop's exit block.
- PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
- BasicBlock *A_ExitingBlock = ExitCondition->getParent();
- BranchInst *A_ExitInsn =
- dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
- assert (A_ExitInsn && "Unable to find suitable loop exit branch");
- BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
- BasicBlock *B_Header = BLoop->getHeader();
- if (ALoop->contains(B_ExitBlock)) {
- B_ExitBlock = A_ExitInsn->getSuccessor(0);
- A_ExitInsn->setSuccessor(0, B_Header);
- } else
- A_ExitInsn->setSuccessor(1, B_Header);
-
- // [*] Update ALoop's exit value using new exit value.
- ExitCondition->setOperand(EVOpNum, AEV);
-
- // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
- // original loop's preheader. Add incoming PHINode values from
- // ALoop's exiting block. Update BLoop header's domiantor info.
-
- // Collect inverse map of Header PHINodes.
- DenseMap<Value *, Value *> InverseMap;
- for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
- BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PHINode *PNClone = cast<PHINode>(VMap[PN]);
- InverseMap[PNClone] = PN;
- } else
- break;
- }
-
- BasicBlock *A_Preheader = ALoop->getLoopPreheader();
- for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- // Remove incoming value from original preheader.
- PN->removeIncomingValue(A_Preheader);
-
- // Add incoming value from A_ExitingBlock.
- if (PN == B_IndVar)
- PN->addIncoming(BSV, A_ExitingBlock);
- else {
- PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
- Value *V2 = NULL;
- // If loop header is also loop exiting block then
- // OrigPN is incoming value for B loop header.
- if (A_ExitingBlock == ALoop->getHeader())
- V2 = OrigPN;
- else
- V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
- PN->addIncoming(V2, A_ExitingBlock);
- }
- } else
- break;
- }
-
- DT->changeImmediateDominator(B_Header, A_ExitingBlock);
- DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
-
- // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
- // block. Remove incoming PHINode values from ALoop's exiting block.
- // Add new incoming values from BLoop's incoming exiting value.
- // Update BLoop exit block's dominator info..
- BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
- for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
- B_ExitingBlock);
- PN->removeIncomingValue(A_ExitingBlock);
- } else
- break;
- }
-
- DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
- DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
-
- //[*] Split ALoop's exit edge. This creates a new block which
- // serves two purposes. First one is to hold PHINode defnitions
- // to ensure that ALoop's LCSSA form. Second use it to act
- // as a preheader for BLoop.
- BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
-
- //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
- // in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
- for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
- PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
- newPHI->addIncoming(V1, A_ExitingBlock);
- A_ExitBlock->getInstList().push_front(newPHI);
- PN->removeIncomingValue(A_ExitBlock);
- PN->addIncoming(newPHI, A_ExitBlock);
- } else
- break;
- }
-
- //[*] Eliminate split condition's inactive branch from ALoop.
- BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
- BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
- BasicBlock *A_InactiveBranch = NULL;
- BasicBlock *A_ActiveBranch = NULL;
- A_ActiveBranch = A_BR->getSuccessor(0);
- A_InactiveBranch = A_BR->getSuccessor(1);
- A_BR->setUnconditionalDest(A_ActiveBranch);
- removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
-
- //[*] Eliminate split condition's inactive branch in from BLoop.
- BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
- BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
- BasicBlock *B_InactiveBranch = NULL;
- BasicBlock *B_ActiveBranch = NULL;
- B_ActiveBranch = B_BR->getSuccessor(1);
- B_InactiveBranch = B_BR->getSuccessor(0);
- B_BR->setUnconditionalDest(B_ActiveBranch);
- removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
-
- BasicBlock *A_Header = ALoop->getHeader();
- if (A_ExitingBlock == A_Header)
- return true;
-
- //[*] Move exit condition into split condition block to avoid
- // executing dead loop iteration.
- ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
- Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
- ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
-
- moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
- cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
- ALoop, EVOpNum);
-
- moveExitCondition(B_SplitCondBlock, B_ActiveBranch,
- B_ExitBlock, B_ExitCondition,
- B_SplitCondition, B_IndVar, B_IndVarIncrement,
- BLoop, EVOpNum);
-
- ++NumIndexSplit;
- return true;
-}
-
-/// cleanBlock - A block is considered clean if all non terminal instructions
-/// are either, PHINodes, IV based.
-bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
- Instruction *Terminator = BB->getTerminator();
- for(BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- Instruction *I = BI;
-
- if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
- || I == SplitCondition || IVBasedValues.count(I)
- || isa<DbgInfoIntrinsic>(I))
- continue;
-
- if (I->mayHaveSideEffects())
- return false;
-
- // I is used only inside this block then it is OK.
- bool usedOutsideBB = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- Instruction *U = cast<Instruction>(*UI);
- if (U->getParent() != BB)
- usedOutsideBB = true;
- }
- if (!usedOutsideBB)
- continue;
-
- // Otherwise we have a instruction that may not allow loop spliting.
- return false;
- }
- return true;
-}
-
-/// IVisLT - If Op is comparing IV based value with an loop invariant and
-/// IV based value is less than the loop invariant then return the loop
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisLE - If Op is comparing IV based value with an loop invariant and
-/// IV based value is less than or equal to the loop invariant then
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisGT - If Op is comparing IV based value with an loop invariant and
-/// IV based value is greater than the loop invariant then return the loop
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisGE - If Op is comparing IV based value with an loop invariant and
-/// IV based value is greater than or equal to the loop invariant then
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopRotation.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(&ID) {}
+ LoopRotate() : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ }
// Rotate Loop L as many times as possible. Return true if
// loop is rotated at least once.
@@ -79,7 +81,11 @@
}
char LoopRotate::ID = 0;
-INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
@@ -143,11 +149,11 @@
// FIXME: Use common api to estimate size.
for (BasicBlock::const_iterator OI = OrigHeader->begin(),
OE = OrigHeader->end(); OI != OE; ++OI) {
- if (isa<PHINode>(OI))
- continue; // PHI nodes don't count.
- if (isa<DbgInfoIntrinsic>(OI))
- continue; // Debug intrinsics don't count as size.
- ++Size;
+ if (isa<PHINode>(OI))
+ continue; // PHI nodes don't count.
+ if (isa<DbgInfoIntrinsic>(OI))
+ continue; // Debug intrinsics don't count as size.
+ ++Size;
}
if (Size > MAX_HEADER_SIZE)
@@ -187,13 +193,30 @@
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
- // For the rest of the instructions, create a clone in the OldPreHeader.
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
- for (; I != E; ++I) {
- Instruction *C = I->clone();
- C->setName(I->getName());
+ while (I != E) {
+ Instruction *Inst = I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) &&
+ !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+ C->setName(Inst->getName());
C->insertBefore(LoopEntryBranch);
- ValueMap[I] = C;
+ ValueMap[Inst] = C;
}
// Along with all the other instructions, we just cloned OrigHeader's
@@ -221,7 +244,7 @@
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal);
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
@@ -261,6 +284,26 @@
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ // Move the original header to the bottom of the loop, where it now more
+ // naturally belongs. This isn't necessary for correctness, and CodeGen can
+ // usually reorder blocks on its own to fix things like this up, but it's
+ // still nice to keep the IR readable.
+ //
+ // The original header should have only one predecessor at this point, since
+ // we checked that the loop had a proper preheader and unique backedge before
+ // we started.
+ assert(OrigHeader->getSinglePredecessor() &&
+ "Original loop header has too many predecessors after loop rotation!");
+ OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+ // Also, since this original header only has one predecessor, zap its
+ // PHI nodes, which are now trivial.
+ FoldSingleEntryPHINodes(OrigHeader);
+
+ // TODO: We could just go ahead and merge OrigHeader into its predecessor
+ // at this point, if we don't mind updating dominator info.
+
+ // Establish a new preheader, update dominators, etc.
preserveCanonicalLoopForm(LPM);
++NumRotated;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Oct 26 19:48:03 2010
@@ -113,7 +113,7 @@
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
void DropRegister(const SCEV *Reg, size_t LUIdx);
- void DropUse(size_t LUIdx);
+ void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -152,18 +152,27 @@
}
void
-RegUseTracker::DropUse(size_t LUIdx) {
- // Remove the use index from every register's use list.
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+ assert(LUIdx <= LastLUIdx);
+
+ // Update RegUses. The data structure is not optimized for this purpose;
+ // we must iterate through it and update each of the bit vectors.
for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
- I != E; ++I)
- I->second.UsedByIndices.reset(LUIdx);
+ I != E; ++I) {
+ SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ if (LUIdx < UsedByIndices.size())
+ UsedByIndices[LUIdx] =
+ LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+ UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+ }
}
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
- if (!RegUsesMap.count(Reg)) return false;
- const SmallBitVector &UsedByIndices =
- RegUsesMap.find(Reg)->second.UsedByIndices;
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
@@ -441,12 +450,12 @@
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
- const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
- IgnoreSignificantBits);
- if (!Start) return 0;
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
return 0;
@@ -505,12 +514,14 @@
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -528,12 +539,14 @@
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -603,7 +616,7 @@
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -640,8 +653,6 @@
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
SetupCost(0) {}
- unsigned getNumRegs() const { return NumRegs; }
-
bool operator<(const Cost &Other) const;
void Loose();
@@ -717,6 +728,9 @@
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
+
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ Reg->hasComputableLoopEvolution(L);
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
@@ -751,9 +765,6 @@
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
-
- NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
- BaseReg->hasComputableLoopEvolution(L);
}
if (F.BaseRegs.size() > 1)
@@ -990,8 +1001,6 @@
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
- void check() const;
-
void print(raw_ostream &OS) const;
void dump() const;
};
@@ -1083,7 +1092,7 @@
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << ',';
}
OS << '}';
@@ -1254,32 +1263,6 @@
}
};
-/// FormulaSorter - This class implements an ordering for formulae which sorts
-/// the by their standalone cost.
-class FormulaSorter {
- /// These two sets are kept empty, so that we compute standalone costs.
- DenseSet<const SCEV *> VisitedRegs;
- SmallPtrSet<const SCEV *, 16> Regs;
- Loop *L;
- LSRUse *LU;
- ScalarEvolution &SE;
- DominatorTree &DT;
-
-public:
- FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
- : L(l), LU(&lu), SE(se), DT(dt) {}
-
- bool operator()(const Formula &A, const Formula &B) {
- Cost CostA;
- CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
- Regs.clear();
- Cost CostB;
- CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
- Regs.clear();
- return CostA < CostB;
- }
-};
-
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
@@ -1338,7 +1321,7 @@
LSRUse::KindType Kind,
const Type *AccessTy);
- void DeleteUse(LSRUse &LU);
+ void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
@@ -1364,6 +1347,10 @@
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1597,7 +1584,7 @@
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
// Add one to the backedge-taken count to get the trip count.
- const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern. There's no check
@@ -1918,10 +1905,13 @@
}
/// DeleteUse - Delete the given use from the Uses list.
-void LSRInstance::DeleteUse(LSRUse &LU) {
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
+
+ // Update RegUses.
+ RegUses.SwapAndDropUse(LUIdx, Uses.size());
}
/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
@@ -1929,33 +1919,41 @@
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
- // Search all uses for the formula. This could be more clever. Ignore
- // ICmpZero uses because they may contain formulae generated by
- // GenerateICmpZeroScales, in which case adding fixup offsets may
- // be invalid.
+ // Search all uses for the formula. This could be more clever.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
- LU.Kind) {
+ F.AM.Scale == OrigF.AM.Scale) {
if (F.AM.BaseOffs == 0)
return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and procede to the next LSRUse.
break;
}
}
}
}
+ // Nothing looked good.
return 0;
}
@@ -1987,7 +1985,7 @@
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- next(I); NewStrideIter != E; ++NewStrideIter) {
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
@@ -2226,14 +2224,13 @@
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
- SmallVectorImpl<const SCEV *> &UninterestingOps,
const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(*I, C, Ops, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
@@ -2241,8 +2238,8 @@
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- C, Ops, UninterestingOps, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
+ C, Ops, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2252,17 +2249,13 @@
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, UninterestingOps, L, SE);
+ Ops, L, SE);
return;
}
}
- // Otherwise use the value itself. Loop-variant "unknown" values are
- // uninteresting; we won't be able to do anything meaningful with them.
- if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
- UninterestingOps.push_back(S);
- else
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself, optionally with a scale applied.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2276,19 +2269,19 @@
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
- CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
-
- // Add any uninteresting values as one register, as we won't be able to
- // form any interesting reassociation opportunities with them. They'll
- // just have to be added inside the loop no matter what we do.
- if (!UninterestingAddOps.empty())
- AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, L, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ continue;
+
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
@@ -2298,9 +2291,9 @@
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
- ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
- (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2396,7 +2389,7 @@
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
std::swap(F.BaseRegs[i], F.BaseRegs.back());
@@ -2797,11 +2790,17 @@
}
GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
}
-/// If their are multiple formulae with the same set of registers used
+/// If there are multiple formulae with the same set of registers used
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2814,7 +2813,6 @@
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
- FormulaSorter Sorter(L, LU, SE, DT);
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
@@ -2840,7 +2838,14 @@
BestFormulae.insert(std::make_pair(Key, FIdx));
if (!P.second) {
Formula &Best = LU.Formulae[P.first->second];
- if (Sorter.operator()(F, Best))
+
+ Cost CostF;
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ Cost CostBest;
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@@ -2880,7 +2885,7 @@
/// this many solutions because it prune the search space, but the pruning
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
- uint32_t Power = 1;
+ size_t Power = 1;
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
@@ -2895,11 +2900,11 @@
return Power;
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
-void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2957,7 +2962,12 @@
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2982,6 +2992,28 @@
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.AM.BaseOffs;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
+ }
+ DEBUG(dbgs() << "New fixup has offset "
+ << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -3000,22 +3032,8 @@
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
- // Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
- if (Fixup.LUIdx == LUIdx) {
- Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.AM.BaseOffs;
- DEBUG(dbgs() << "New fixup has offset "
- << Fixup.Offset << '\n');
- }
- if (Fixup.LUIdx == NumUses-1)
- Fixup.LUIdx = LUIdx;
- }
-
// Delete the old use.
- DeleteUse(LU);
+ DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
@@ -3028,7 +3046,30 @@
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
@@ -3090,6 +3131,17 @@
}
}
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
/// SolveRecurse - This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
@@ -3633,10 +3685,6 @@
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
- DEBUG(dbgs() << "\n"
- "After generating reuse formulae:\n";
- print_uses(dbgs()));
-
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
@@ -3743,15 +3791,25 @@
}
char LoopStrengthReduce::ID = 0;
-INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
- "Loop Strength Reduction", false, false);
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(&ID), TLI(tli) {}
+ : LoopPass(ID), TLI(tli) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+ }
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnrollPass.cpp Tue Oct 26 19:48:03 2010
@@ -27,7 +27,7 @@
using namespace llvm;
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
@@ -43,12 +43,20 @@
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
+ LoopUnroll() : LoopPass(ID) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
+
+ // Threshold to use when optsize is specified (and there is no
+ // explicit -unroll-threshold).
+ static const unsigned OptSizeUnrollThreshold = 50;
+
+ unsigned CurrentThreshold;
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -62,19 +70,22 @@
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- AU.addPreserved<ScalarEvolution>();
}
};
}
char LoopUnroll::ID = 0;
-INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
@@ -84,17 +95,50 @@
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I);
- NumCalls = Metrics.NumCalls;
- return Metrics.NumInsts;
+ NumCalls = Metrics.NumInlineCandidates;
+
+ unsigned LoopSize = Metrics.NumInsts;
+
+ // If we can identify the induction variable, we know that it will become
+ // constant when we unroll the loop, so factor that into our loop size
+ // estimate.
+ // FIXME: We have to divide by InlineConstants::InstrCost because the
+ // measure returned by CountCodeReductionForConstant is not an instruction
+ // count, but rather a weight as defined by InlineConstants. It would
+ // probably be a good idea to standardize on a single weighting scheme by
+ // pushing more of the logic for weighting into CodeMetrics.
+ if (PHINode *IndVar = L->getCanonicalInductionVariable()) {
+ unsigned SizeDecrease = Metrics.CountCodeReductionForConstant(IndVar);
+ // NOTE: Because SizeDecrease is a fuzzy estimate, we don't want to allow
+ // it to totally negate the cost of unrolling a loop.
+ SizeDecrease = SizeDecrease > LoopSize / 2 ? LoopSize / 2 : SizeDecrease;
+ }
+
+ // Don't allow an estimate of size zero. This would allows unrolling of loops
+ // with huge iteration counts, which is a compile time problem even if it's
+ // not a problem for code quality.
+ if (LoopSize == 0) LoopSize = 1;
+
+ return LoopSize;
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+
LoopInfo *LI = &getAnalysis<LoopInfo>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
+
+ // Determine the current unrolling threshold. While this is normally set
+ // from UnrollThreshold, it is overridden to a smaller value if the current
+ // function is marked as optimize-for-size, and the unroll threshold was
+ // not user specified.
+ CurrentThreshold = UnrollThreshold;
+ if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
+ UnrollThreshold.getNumOccurrences() == 0)
+ CurrentThreshold = OptSizeUnrollThreshold;
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
@@ -112,25 +156,25 @@
}
// Enforce the threshold.
- if (UnrollThreshold != NoThreshold) {
- unsigned NumCalls;
- unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
+ if (CurrentThreshold != NoThreshold) {
+ unsigned NumInlineCandidates;
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
- if (NumCalls != 0) {
- DEBUG(dbgs() << " Not unrolling loop with function calls.\n");
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > UnrollThreshold) {
+ if (TripCount != 1 && Size > CurrentThreshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << UnrollThreshold << "\n");
+ << " because size: " << Size << ">" << CurrentThreshold << "\n");
if (!UnrollAllowPartial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = UnrollThreshold / LoopSize;
+ Count = CurrentThreshold / LoopSize;
while (Count != 0 && TripCount%Count != 0) {
Count--;
}
@@ -148,12 +192,7 @@
return false;
// FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
return true;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/LoopUnswitch.cpp Tue Oct 26 19:48:03 2010
@@ -77,7 +77,6 @@
bool redoLoop;
Loop *currentLoop;
- DominanceFrontier *DF;
DominatorTree *DT;
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
@@ -92,15 +91,17 @@
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
- LoopPass(&ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
- loopPreheader(NULL) {}
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
+ loopPreheader(NULL) {
+ initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+ }
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
+ /// loop preheaders be inserted into the CFG.
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
@@ -110,7 +111,6 @@
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
private:
@@ -160,7 +160,13 @@
};
}
char LoopUnswitch::ID = 0;
-INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -201,7 +207,6 @@
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
- DF = getAnalysisIfAvailable<DominanceFrontier>();
DT = getAnalysisIfAvailable<DominatorTree>();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
@@ -216,8 +221,6 @@
// FIXME: Reconstruct dom info, because it is not preserved properly.
if (DT)
DT->runOnFunction(*F);
- if (DF)
- DF->runOnFunction(*F);
}
return Changed;
}
@@ -282,19 +285,18 @@
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
-/// 1. Exit the loop with no side effects.
-/// 2. Branch to the latch block with no side-effects.
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
///
-/// If these conditions are true, we return true and set ExitBB to the block we
+/// If true, we return true and set ExitBB to the block we
/// exit through.
///
static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
BasicBlock *&ExitBB,
std::set<BasicBlock*> &Visited) {
if (!Visited.insert(BB).second) {
- // Already visited and Ok, end of recursion.
- return true;
+ // Already visited. Without more analysis, this could indicate an infinte loop.
+ return false;
} else if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
@@ -324,7 +326,7 @@
/// process. If so, return the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
- Visited.insert(L->getHeader()); // Branches to header are ok.
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
BasicBlock *ExitBB = 0;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
@@ -356,8 +358,8 @@
if (!BI->isConditional() || BI->getCondition() != Cond)
return false;
- // Check to see if a successor of the branch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
@@ -460,10 +462,10 @@
// current values into those specified by VMap.
//
static inline void RemapInstruction(Instruction *I,
- ValueMap<const Value *, Value*> &VMap) {
+ ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
if (It != VMap.end()) Op = It->second;
I->setOperand(op, Op);
}
@@ -471,7 +473,7 @@
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
LPM->insertLoop(New, PL);
@@ -615,7 +617,7 @@
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
NewBlocks.reserve(LoopBlocks.size());
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
NewBlocks.push_back(NewBB);
@@ -653,7 +655,7 @@
for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ ValueToValueMapTy::iterator It = VMap.find(V);
if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp Tue Oct 26 19:48:03 2010
@@ -304,7 +304,9 @@
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(&ID) {}
+ MemCpyOpt() : FunctionPass(ID) {
+ initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ }
private:
// This transformation requires dominator postdominator info
@@ -321,7 +323,8 @@
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+ bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C);
bool iterateOnFunction(Function &F);
};
@@ -331,9 +334,13 @@
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
-INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
-
-
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
/// processStore - When GVN is scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
@@ -342,6 +349,37 @@
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (SI->isVolatile()) return false;
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (!LI->isVolatile() && LI->hasOneUse()) {
+ MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ MemDepResult dep = MD.getDependency(LI);
+ CallInst *C = 0;
+ if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+ C = dyn_cast<CallInst>(dep.getInst());
+
+ if (C) {
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ if (changed) {
+ MD.removeInstruction(SI);
+ SI->eraseFromParent();
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
LLVMContext &Context = SI->getContext();
// There are two cases that are interesting for this code to handle: memcpy
@@ -354,8 +392,6 @@
if (!ByteVal)
return false;
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
Module *M = SI->getParent()->getParent()->getParent();
@@ -489,7 +525,9 @@
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+ Value *cpyDest, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -506,16 +544,8 @@
// Deliberately get the source and destination with bitcasts stripped away,
// because we'll need to do type comparisons based on the underlying type.
- Value *cpyDest = cpy->getDest();
- Value *cpySrc = cpy->getSource();
CallSite CS(C);
- // We need to be able to reason about the size of the memcpy, so we require
- // that it be a constant.
- ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
- if (!cpyLength)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
@@ -532,7 +562,7 @@
uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLength->getZExtValue() < srcSize)
+ if (cpyLen < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
@@ -601,7 +631,7 @@
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
+ if (AA.getModRefInfo(C, cpyDest, srcSize) !=
AliasAnalysis::NoModRef)
return false;
@@ -630,7 +660,6 @@
// Remove the memcpy
MD.removeInstruction(cpy);
- cpy->eraseFromParent();
++NumMemCpyInstr;
return true;
@@ -644,6 +673,10 @@
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
+ // We can only optimize statically-sized memcpy's.
+ ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!cpyLen) return false;
+
// The are two possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
@@ -651,8 +684,12 @@
if (!dep.isClobber())
return false;
if (!isa<MemCpyInst>(dep.getInst())) {
- if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
- return performCallSlotOptzn(M, C);
+ if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
+ bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ cpyLen->getZExtValue(), C);
+ if (changed) M->eraseFromParent();
+ return changed;
+ }
return false;
}
@@ -697,14 +734,21 @@
M->getParent()->getParent()->getParent(),
M->getIntrinsicID(), ArgTys, 3);
+ // Make sure to use the lesser of the alignment of the source and the dest
+ // since we're changing where we're reading from, but don't want to increase
+ // the alignment past what can be read from or written to.
+ // TODO: Is this worth it if we're creating a less aligned memcpy? For
+ // example we could be moving from movaps -> movq on x86.
+ unsigned Align = std::min(MDep->getAlignmentCst()->getZExtValue(),
+ M->getAlignmentCst()->getZExtValue());
+ LLVMContext &Context = M->getContext();
+ ConstantInt *AlignCI = ConstantInt::get(Type::getInt32Ty(Context), Align);
Value *Args[5] = {
M->getRawDest(), MDep->getRawSource(), M->getLength(),
- M->getAlignmentCst(), M->getVolatileCst()
+ AlignCI, M->getVolatileCst()
};
-
CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
-
// If C and M don't interfere, then this is a valid transformation. If they
// did, this would mean that the two sources overlap, which would be bad.
if (MD.getDependency(C) == dep) {
@@ -728,7 +772,7 @@
// If the memmove is a constant size, use it for the alias query, this allows
// us to optimize things like: memmove(P, P+64, 64);
- uint64_t MemMoveSize = ~0ULL;
+ uint64_t MemMoveSize = AliasAnalysis::UnknownSize;
if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
MemMoveSize = Len->getZExtValue();
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Reassociate.cpp Tue Oct 26 19:48:03 2010
@@ -77,7 +77,9 @@
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(&ID) {}
+ Reassociate() : FunctionPass(ID) {
+ initializeReassociatePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -104,7 +106,7 @@
char Reassociate::ID = 0;
INITIALIZE_PASS(Reassociate, "reassociate",
- "Reassociate expressions", false, false);
+ "Reassociate expressions", false, false)
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Reg2Mem.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
namespace {
struct RegToMem : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(&ID) {}
+ RegToMem() : FunctionPass(ID) {
+ initializeRegToMemPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,9 +61,11 @@
}
char RegToMem::ID = 0;
-static RegisterPass<RegToMem>
-X("reg2mem", "Demote all values to stack slots");
-
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
bool RegToMem::runOnFunction(Function &F) {
if (F.isDeclaration())
@@ -124,7 +128,7 @@
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
//
-const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SCCP.cpp Tue Oct 26 19:48:03 2010
@@ -275,12 +275,12 @@
return I->second;
}
- LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+ /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
StructValueState.find(std::make_pair(V, i));
assert(I != StructValueState.end() && "V is not in valuemap!");
return I->second;
- }
+ }*/
/// getTrackedRetVals - Get the inferred return value map.
///
@@ -518,7 +518,6 @@
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVANextInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
void visitInstruction(Instruction &I) {
@@ -1586,7 +1585,9 @@
///
struct SCCP : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(&ID) {}
+ SCCP() : FunctionPass(ID) {
+ initializeSCCPPass(*PassRegistry::getPassRegistry());
+ }
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
@@ -1601,7 +1602,7 @@
char SCCP::ID = 0;
INITIALIZE_PASS(SCCP, "sccp",
- "Sparse Conditional Constant Propagation", false, false);
+ "Sparse Conditional Constant Propagation", false, false)
// createSCCPPass - This is the public interface to this file.
FunctionPass *llvm::createSCCPPass() {
@@ -1702,7 +1703,9 @@
///
struct IPSCCP : public ModulePass {
static char ID;
- IPSCCP() : ModulePass(&ID) {}
+ IPSCCP() : ModulePass(ID) {
+ initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
};
} // end anonymous namespace
@@ -1710,7 +1713,7 @@
char IPSCCP::ID = 0;
INITIALIZE_PASS(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
- false, false);
+ false, false)
// createIPSCCPPass - This is the public interface to this file.
ModulePass *llvm::createIPSCCPPass() {
@@ -1749,6 +1752,13 @@
bool IPSCCP::runOnModule(Module &M) {
SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
//
@@ -1764,9 +1774,13 @@
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
- if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
- Solver.AddArgumentTrackedFunction(F);
- continue;
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
}
// Assume the function is called.
@@ -1951,7 +1965,7 @@
continue;
// We can only do this if we know that nothing else can call the function.
- if (!F->hasLocalLinkage() || AddressIsTaken(F))
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
continue;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Scalar.cpp Tue Oct 26 19:48:03 2010
@@ -7,12 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMScalarOpts.a, which implements
-// several scalar transformations over the LLVM intermediate representation.
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetData.h"
@@ -20,6 +23,46 @@
using namespace llvm;
+/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+ initializeADCEPass(Registry);
+ initializeBlockPlacementPass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeConstantPropagationPass(Registry);
+ initializeCorrelatedValuePropagationPass(Registry);
+ initializeDCEPass(Registry);
+ initializeDeadInstEliminationPass(Registry);
+ initializeDSEPass(Registry);
+ initializeGEPSplitterPass(Registry);
+ initializeGVNPass(Registry);
+ initializeIndVarSimplifyPass(Registry);
+ initializeJumpThreadingPass(Registry);
+ initializeLICMPass(Registry);
+ initializeLoopDeletionPass(Registry);
+ initializeLoopRotatePass(Registry);
+ initializeLoopStrengthReducePass(Registry);
+ initializeLoopUnrollPass(Registry);
+ initializeLoopUnswitchPass(Registry);
+ initializeLowerAtomicPass(Registry);
+ initializeMemCpyOptPass(Registry);
+ initializeReassociatePass(Registry);
+ initializeRegToMemPass(Registry);
+ initializeSCCPPass(Registry);
+ initializeIPSCCPPass(Registry);
+ initializeSROAPass(Registry);
+ initializeCFGSimplifyPassPass(Registry);
+ initializeSimplifyHalfPowrLibCallsPass(Registry);
+ initializeSimplifyLibCallsPass(Registry);
+ initializeSinkingPass(Registry);
+ initializeTailDupPass(Registry);
+ initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+ initializeScalarOpts(*unwrap(R));
+}
+
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
@@ -56,10 +99,6 @@
unwrap(PM)->add(createLoopDeletionPass());
}
-void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopIndexSplitPass());
-}
-
void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Oct 26 19:48:03 2010
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Target/TargetData.h"
@@ -51,7 +52,8 @@
namespace {
struct SROA : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(&ID) {
+ explicit SROA(signed T = -1) : FunctionPass(ID) {
+ initializeSROAPass(*PassRegistry::getPassRegistry());
if (T == -1)
SRThreshold = 128;
else
@@ -114,8 +116,7 @@
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
- AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -135,8 +136,12 @@
}
char SROA::ID = 0;
-INITIALIZE_PASS(SROA, "scalarrepl",
- "Scalar Replacement of Aggregates", false, false);
+INITIALIZE_PASS_BEGIN(SROA, "scalarrepl",
+ "Scalar Replacement of Aggregates", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(SROA, "scalarrepl",
+ "Scalar Replacement of Aggregates", false, false)
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
@@ -194,6 +199,27 @@
};
} // end anonymous namespace.
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form. We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+ StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+ if (!Triple.startswith("i386") &&
+ !Triple.startswith("x86_64"))
+ return false;
+
+ // Reject all the MMX vector types.
+ switch (VTy->getNumElements()) {
+ default: return false;
+ case 1: return VTy->getElementType()->isIntegerTy(64);
+ case 2: return VTy->getElementType()->isIntegerTy(32);
+ case 4: return VTy->getElementType()->isIntegerTy(16);
+ case 8: return VTy->getElementType()->isIntegerTy(8);
+ }
+}
+
+
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -210,7 +236,8 @@
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+ !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -299,6 +326,9 @@
// Don't break volatile loads.
if (LI->isVolatile())
return false;
+ // Don't touch MMX operations.
+ if (LI->getType()->isX86_MMXTy())
+ return false;
MergeInType(LI->getType(), Offset);
continue;
}
@@ -306,6 +336,9 @@
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ // Don't touch MMX operations.
+ if (SI->getOperand(0)->getType()->isX86_MMXTy())
+ return false;
MergeInType(SI->getOperand(0)->getType(), Offset);
continue;
}
@@ -1414,7 +1447,7 @@
Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
OtherPTy->getAddressSpace());
OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
- OtherElt->getNameStr(), MI);
+ OtherElt->getName(), MI);
}
}
@@ -1663,6 +1696,12 @@
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ return HasPadding(ATy->getElementType(), TD);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return HasPadding(VTy->getElementType(), TD);
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
@@ -1692,12 +1731,8 @@
if (PrevFieldEnd < SL->getSizeInBits())
return true;
}
-
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- return HasPadding(ATy->getElementType(), TD);
- } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- return HasPadding(VTy->getElementType(), TD);
}
+
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp Tue Oct 26 19:48:03 2010
@@ -42,7 +42,9 @@
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(&ID) {}
+ CFGSimplifyPass() : FunctionPass(ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
};
@@ -50,7 +52,7 @@
char CFGSimplifyPass::ID = 0;
INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
- "Simplify the CFG", false, false);
+ "Simplify the CFG", false, false)
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
@@ -285,10 +287,9 @@
while (LocalChange) {
LocalChange = false;
- // Loop over all of the basic blocks (except the first one) and remove them
- // if they are unneeded...
+ // Loop over all of the basic blocks and remove them if they are unneeded...
//
- for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(BBIt++, TD)) {
LocalChange = true;
++NumSimpl;
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp Tue Oct 26 19:48:03 2010
@@ -32,7 +32,9 @@
const TargetData *TD;
public:
static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
+ initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -47,7 +49,7 @@
} // end anonymous namespace.
INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
- "Simplify half_powr library calls", false, false);
+ "Simplify half_powr library calls", false, false)
// Public interface to the Simplify HalfPowr LibCalls pass.
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Oct 26 19:48:03 2010
@@ -223,7 +223,8 @@
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
- FT->getParamType(0) != FT->getReturnType())
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
return 0;
Value *SrcStr = CI->getArgOperand(0);
@@ -252,26 +253,61 @@
// strchr can find the nul character.
Str += '\0';
- char CharValue = CharC->getSExtValue();
// Compute the offset.
- uint64_t i = 0;
- while (1) {
- if (i == Str.size()) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
- // Did we find our match?
- if (Str[i] == CharValue)
- break;
- ++i;
- }
+ size_t I = Str.find(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
- Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
+ Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
return B.CreateGEP(SrcStr, Idx, "strchr");
}
};
//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD);
+ return 0;
+ }
+
+ // strrchr can find the nul character.
+ Str += '\0';
+
+ // Compute the offset.
+ size_t I = Str.rfind(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
+ return B.CreateGEP(SrcStr, Idx, "strrchr");
+ }
+};
+
+//===---------------------------------------===//
// 'strcmp' Optimizations
struct StrCmpOpt : public LibCallOptimization {
@@ -488,6 +524,46 @@
}
};
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == std::string::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
+ return B.CreateGEP(CI->getArgOperand(0), Idx, "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+ return 0;
+ }
+};
+
//===---------------------------------------===//
// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
@@ -510,6 +586,67 @@
};
//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
// 'strstr' Optimizations
struct StrStrOpt : public LibCallOptimization {
@@ -1220,10 +1357,10 @@
class SimplifyLibCalls : public FunctionPass {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
- StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
- StrNCpyOpt StrNCpy; StrLenOpt StrLen;
- StrToOpt StrTo; StrStrOpt StrStr;
+ StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+ StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+ StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
@@ -1237,7 +1374,9 @@
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+ initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+ }
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1255,7 +1394,7 @@
} // end anonymous namespace.
INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
- "Simplify well-known library calls", false, false);
+ "Simplify well-known library calls", false, false)
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1269,11 +1408,13 @@
Optimizations["strcat"] = &StrCat;
Optimizations["strncat"] = &StrNCat;
Optimizations["strchr"] = &StrChr;
+ Optimizations["strrchr"] = &StrRChr;
Optimizations["strcmp"] = &StrCmp;
Optimizations["strncmp"] = &StrNCmp;
Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
Optimizations["strlen"] = &StrLen;
+ Optimizations["strpbrk"] = &StrPBrk;
Optimizations["strtol"] = &StrTo;
Optimizations["strtod"] = &StrTo;
Optimizations["strtof"] = &StrTo;
@@ -1281,6 +1422,8 @@
Optimizations["strtoll"] = &StrTo;
Optimizations["strtold"] = &StrTo;
Optimizations["strtoull"] = &StrTo;
+ Optimizations["strspn"] = &StrSpn;
+ Optimizations["strcspn"] = &StrCSpn;
Optimizations["strstr"] = &StrStr;
Optimizations["memcmp"] = &MemCmp;
Optimizations["memcpy"] = &MemCpy;
@@ -2156,7 +2299,7 @@
// * pow(pow(x,y),z)-> pow(x,y*z)
//
// puts:
-// * puts("") -> putchar("\n")
+// * puts("") -> putchar('\n')
//
// round, roundf, roundl:
// * round(cnst) -> cnst'
@@ -2173,24 +2316,6 @@
// stpcpy:
// * stpcpy(str, "literal") ->
// llvm.memcpy(str,"literal",strlen("literal")+1,1)
-// strrchr:
-// * strrchr(s,c) -> reverse_offset_of_in(c,s)
-// (if c is a constant integer and s is a constant string)
-// * strrchr(s1,0) -> strchr(s1,0)
-//
-// strpbrk:
-// * strpbrk(s,a) -> offset_in_for(s,a)
-// (if s and a are both constant strings)
-// * strpbrk(s,"") -> 0
-// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
-//
-// strspn, strcspn:
-// * strspn(s,a) -> const_int (if both args are constant)
-// * strspn("",a) -> 0
-// * strspn(s,"") -> 0
-// * strcspn(s,a) -> const_int (if both args are constant)
-// * strcspn("",a) -> 0
-// * strcspn(s,"") -> strlen(a)
//
// tan, tanf, tanl:
// * tan(atan(x)) -> x
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/Sink.cpp Tue Oct 26 19:48:03 2010
@@ -35,7 +35,9 @@
public:
static char ID; // Pass identification
- Sinking() : FunctionPass(&ID) {}
+ Sinking() : FunctionPass(ID) {
+ initializeSinkingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -56,7 +58,11 @@
} // end anonymous namespace
char Sinking::ID = 0;
-INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -151,7 +157,7 @@
if (L->isVolatile()) return false;
Value *Ptr = L->getPointerOperand();
- unsigned Size = AA->getTypeStoreSize(L->getType());
+ uint64_t Size = AA->getTypeStoreSize(L->getType());
for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
E = Stores.end(); I != E; ++I)
if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/TailDuplication.cpp Tue Oct 26 19:48:03 2010
@@ -49,7 +49,9 @@
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(&ID) {}
+ TailDup() : FunctionPass(ID) {
+ initializeTailDupPass(*PassRegistry::getPassRegistry());
+ }
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +61,7 @@
}
char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
// Public interface to the Tail Duplication pass
FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
Modified: llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Scalar/TailRecursionElimination.cpp Tue Oct 26 19:48:03 2010
@@ -72,7 +72,9 @@
namespace {
struct TailCallElim : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(&ID) {}
+ TailCallElim() : FunctionPass(ID) {
+ initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -88,7 +90,7 @@
char TailCallElim::ID = 0;
INITIALIZE_PASS(TailCallElim, "tailcallelim",
- "Tail Call Elimination", false, false);
+ "Tail Call Elimination", false, false)
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
@@ -278,22 +280,22 @@
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
- for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != IgnoreRI) {
- Value *RetOp = RI->getOperand(0);
-
- // We can only perform this transformation if the value returned is
- // evaluatable at the start of the initial invocation of the function,
- // instead of at the end of the evaluation.
- //
- if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
-
- if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
- ReturnedValue = RetOp;
- }
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
return ReturnedValue;
}
@@ -307,7 +309,7 @@
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
- // Exactly one operand should be the result of the call instruction...
+ // Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
return 0;
@@ -387,21 +389,22 @@
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
- if (!CanMoveAboveCall(BBI, CI)) {
- // If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
- // using accumulator recursion elimination. Check to see if this is the
- // case, and if so, remember the initial accumulator value for later.
- if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
- // Yes, this is accumulator recursion. Remember which instruction
- // accumulates.
- AccumulatorRecursionInstr = BBI;
- } else {
- return false; // Otherwise, we cannot eliminate the tail recursion!
- }
+ for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
}
+ }
// We can only transform call/return pairs that either ignore the return value
// of the call and return void, ignore the value of the call and return a
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/AddrModeMatcher.cpp Tue Oct 26 19:48:03 2010
@@ -21,6 +21,7 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -379,27 +380,10 @@
/// return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetLowering &TLI) {
- std::vector<InlineAsm::ConstraintInfo>
- Constraints = IA->ParseConstraints();
-
- unsigned ArgNo = 0; // The argument of the CallInst.
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
- // Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
- }
-
+ std::vector<TargetLowering::AsmOperandInfo> TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/BasicBlockUtils.cpp Tue Oct 26 19:48:03 2010
@@ -97,23 +97,13 @@
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
- pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- // Can't merge the entry block. Don't merge away blocks who have their
- // address taken: this is a bug if the predecessor block is the entry node
- // (because we'd end up taking the address of the entry) and undesirable in
- // any case.
- if (pred_begin(BB) == pred_end(BB) ||
- BB->hasAddressTaken()) return false;
-
- BasicBlock *PredBB = *PI++;
- for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
- if (*PI != PredBB) {
- PredBB = 0; // There are multiple different predecessors...
- break;
- }
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
- // Can't merge if there are multiple predecessors.
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
if (!PredBB) return false;
+
// Don't break self-loops.
if (PredBB == BB) return false;
// Don't break invokes.
@@ -267,7 +257,7 @@
case Instruction::Switch: // Should remove entry
default:
case Instruction::Ret: // Cannot happen, has no successors!
- llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
+ llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
}
if (NewTI) // If it's a different instruction, replace.
@@ -337,7 +327,7 @@
L->addBasicBlockToLoop(New, LI->getBase());
if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- // Old dominates New. New node domiantes all other nodes dominated by Old.
+ // Old dominates New. New node dominates all other nodes dominated by Old.
DomTreeNode *OldNode = DT->getNode(Old);
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
@@ -421,7 +411,8 @@
DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
if (DT)
DT->splitBlock(NewBB);
- if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
+ if (DominanceFrontier *DF =
+ P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
DF->splitBlock(NewBB);
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/BreakCriticalEdges.cpp Tue Oct 26 19:48:03 2010
@@ -36,7 +36,9 @@
namespace {
struct BreakCriticalEdges : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(&ID) {}
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -53,11 +55,11 @@
}
char BreakCriticalEdges::ID = 0;
-static RegisterPass<BreakCriticalEdges>
-X("break-crit-edges", "Break critical edges in CFG");
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false)
// Publically exposed interface to pass...
-const PassInfo *const llvm::BreakCriticalEdgesID = &X;
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CMakeLists.txt Tue Oct 26 19:48:03 2010
@@ -20,10 +20,9 @@
Mem2Reg.cpp
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
- SSI.cpp
SimplifyCFG.cpp
UnifyFunctionExitNodes.cpp
+ Utils.cpp
ValueMapper.cpp
)
-target_link_libraries (LLVMTransformUtils LLVMSupport)
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneFunction.cpp Tue Oct 26 19:48:03 2010
@@ -23,7 +23,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Support/CFG.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
@@ -69,10 +69,11 @@
}
// Clone OldFunc into NewFunc, transforming the old arguments into references to
-// ArgMap values.
+// VMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -126,7 +127,7 @@
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap);
+ RemapInstruction(II, VMap, ModuleLevelChanges);
}
/// CloneFunction - Return a copy of the specified function, but without
@@ -139,6 +140,7 @@
///
Function *llvm::CloneFunction(const Function *F,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
@@ -167,7 +169,7 @@
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
return NewF;
}
@@ -180,6 +182,7 @@
Function *NewFunc;
const Function *OldFunc;
ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
SmallVectorImpl<ReturnInst*> &Returns;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
@@ -187,12 +190,14 @@
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
SmallVectorImpl<ReturnInst*> &returns,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
const TargetData *td)
- : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
}
/// CloneBlock - The specified block is found to be reachable, clone it and
@@ -211,7 +216,7 @@
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone){
- Value *&BBEntry = VMap[BB];
+ TrackingVH<Value> &BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -257,8 +262,10 @@
// If the condition was a known constant in the callee...
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
- if (Cond == 0)
- Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
+ if (Cond == 0) {
+ Value *V = VMap[BI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
// Constant fold to uncond branch!
if (Cond) {
@@ -271,8 +278,10 @@
} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (Cond == 0) // Or known constant after constant prop in the callee...
- Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
+ if (Cond == 0) { // Or known constant after constant prop in the callee...
+ Value *V = VMap[SI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
if (Cond) { // Constant fold to uncond branch!
BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
@@ -313,7 +322,7 @@
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap)))
+ VMap, ModuleLevelChanges)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -355,6 +364,7 @@
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
@@ -368,8 +378,8 @@
assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
- PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
- NameSuffix, CodeInfo, TD);
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ Returns, NameSuffix, CodeInfo, TD);
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
@@ -388,7 +398,8 @@
SmallVector<const PHINode*, 16> PHIToResolve;
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
- BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
+ Value *V = VMap[BI];
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (NewBB == 0) continue; // Dead block.
// Add the new block to the new function.
@@ -449,7 +460,7 @@
I->setDebugLoc(DebugLoc());
}
}
- RemapInstruction(I, VMap);
+ RemapInstruction(I, VMap, ModuleLevelChanges);
}
}
@@ -468,10 +479,11 @@
OPN = PHIToResolve[phino];
PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+ Value *V = VMap[PN->getIncomingBlock(pred)];
if (BasicBlock *MappedBlock =
- cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
+ cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap);
+ VMap, ModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneLoop.cpp Tue Oct 26 19:48:03 2010
@@ -22,12 +22,12 @@
/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
/// dominance info. It is expected that basic block is already cloned.
static void CloneDominatorInfo(BasicBlock *BB,
- ValueMap<const Value *, Value *> &VMap,
+ ValueToValueMapTy &VMap,
DominatorTree *DT,
DominanceFrontier *DF) {
assert (DT && "DominatorTree is not available");
- ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+ ValueToValueMapTy::iterator BI = VMap.find(BB);
assert (BI != VMap.end() && "BasicBlock clone is missing");
BasicBlock *NewBB = cast<BasicBlock>(BI->second);
@@ -42,7 +42,7 @@
// NewBB's dominator is either BB's dominator or BB's dominator's clone.
BasicBlock *NewBBDom = BBDom;
- ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+ ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
if (BBDomI != VMap.end()) {
NewBBDom = cast<BasicBlock>(BBDomI->second);
if (!DT->getNode(NewBBDom))
@@ -59,7 +59,7 @@
for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
I != E; ++I) {
BasicBlock *DB = *I;
- ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
+ ValueToValueMapTy::iterator IDM = VMap.find(DB);
if (IDM != VMap.end())
NewDFSet.insert(cast<BasicBlock>(IDM->second));
else
@@ -73,7 +73,7 @@
/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
/// using old blocks to new blocks mapping.
Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
- ValueMap<const Value *, Value *> &VMap, Pass *P) {
+ ValueToValueMapTy &VMap, Pass *P) {
DominatorTree *DT = NULL;
DominanceFrontier *DF = NULL;
@@ -134,7 +134,7 @@
for (unsigned index = 0, num_ops = Insn->getNumOperands();
index != num_ops; ++index) {
Value *Op = Insn->getOperand(index);
- ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+ ValueToValueMapTy::iterator OpItr = VMap.find(Op);
if (OpItr != VMap.end())
Insn->setOperand(index, OpItr->second);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CloneModule.cpp Tue Oct 26 19:48:03 2010
@@ -17,7 +17,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/TypeSymbolTable.h"
#include "llvm/Constant.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
/// CloneModule - Return an exact copy of the specified module. This is not as
@@ -89,7 +89,8 @@
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- VMap)));
+ VMap,
+ true)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -108,7 +109,7 @@
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, VMap, Returns);
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
F->setLinkage(I->getLinkage());
@@ -120,7 +121,7 @@
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
}
// And named metadata....
@@ -129,23 +130,8 @@
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
+ NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
}
- // Update metadata attach with instructions.
- for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)
- for (Function::iterator FI = MI->begin(), FE = MI->end();
- FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
- BI->getAllMetadata(MDs);
- for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator
- MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
- Value *MappedValue = MapValue(MDI->second, VMap);
- if (MDI->second != MappedValue && MappedValue)
- BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
- }
- }
return New;
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/CodeExtractor.cpp Tue Oct 26 19:48:03 2010
@@ -186,8 +186,8 @@
if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
if (DT) {
- // Old dominates New. New node domiantes all other nodes dominated
- //by Old.
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
DomTreeNode *OldNode = DT->getNode(*I);
SmallVector<DomTreeNode*, 8> Children;
for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/InlineFunction.cpp Tue Oct 26 19:48:03 2010
@@ -171,7 +171,7 @@
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
- ValueMap<const Value*, Value*> &VMap,
+ ValueToValueMapTy &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -194,7 +194,7 @@
for (; I != E; ++I) {
const Value *OrigCall = I->first;
- ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
if (VMI == VMap.end() || VMI->second == 0)
continue;
@@ -288,7 +288,7 @@
Function::iterator FirstNewBlock;
{ // Scope to destroy VMap after cloning.
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -366,7 +366,8 @@
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo, IFI.TD, TheCall);
// Remember the first block that is newly cloned over.
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/InstructionNamer.cpp Tue Oct 26 19:48:03 2010
@@ -23,7 +23,9 @@
namespace {
struct InstNamer : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(&ID) {}
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -48,12 +50,11 @@
};
char InstNamer::ID = 0;
- static RegisterPass<InstNamer> X("instnamer",
- "Assign names to anonymous instructions");
}
-
-const PassInfo *const llvm::InstructionNamerID = &X;
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
//===----------------------------------------------------------------------===//
//
// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LCSSA.cpp Tue Oct 26 19:48:03 2010
@@ -47,7 +47,9 @@
namespace {
struct LCSSA : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(&ID) {}
+ LCSSA() : LoopPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
// Cached analysis information for the current function.
DominatorTree *DT;
@@ -90,10 +92,13 @@
}
char LCSSA::ID = 0;
-static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-const PassInfo *const llvm::LCSSAID = &X;
+char &llvm::LCSSAID = LCSSA::ID;
/// BlockDominatesAnExit - Return true if the specified block dominates at least
@@ -206,7 +211,7 @@
DomTreeNode *DomNode = DT->getNode(DomBB);
SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(Inst);
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/Local.cpp Tue Oct 26 19:48:03 2010
@@ -490,6 +490,9 @@
/// rewriting all the predecessors to branch to the successor block and return
/// true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
// We can't eliminate infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
if (BB == Succ) return false;
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopSimplify.cpp Tue Oct 26 19:48:03 2010
@@ -46,9 +46,9 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
@@ -65,13 +65,16 @@
namespace {
struct LoopSimplify : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(&ID) {}
+ LoopSimplify() : LoopPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
// AA - If we have an alias analysis object to update, this is it, otherwise
// this is null.
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
+ ScalarEvolution *SE;
Loop *L;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -106,11 +109,15 @@
}
char LoopSimplify::ID = 0;
-static RegisterPass<LoopSimplify>
-X("loopsimplify", "Canonicalize natural loops", true);
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loopsimplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loopsimplify",
+ "Canonicalize natural loops", true, false)
// Publically exposed interface to pass...
-const PassInfo *const llvm::LoopSimplifyID = &X;
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
@@ -122,6 +129,7 @@
LI = &getAnalysis<LoopInfo>();
AA = getAnalysisIfAvailable<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
Changed |= ProcessLoop(L, LPM);
@@ -533,6 +541,12 @@
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
OuterLoopPreds.size(),
@@ -622,6 +636,11 @@
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
if (P != Preheader) BackedgeBlocks.push_back(P);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LoopUnroll.cpp Tue Oct 26 19:48:03 2010
@@ -40,10 +40,10 @@
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
static inline void RemapInstruction(Instruction *I,
- ValueMap<const Value *, Value*> &VMap) {
+ ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
if (It != VMap.end())
I->setOperand(op, It->second);
}
@@ -189,7 +189,6 @@
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
- typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -274,7 +273,7 @@
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, LastValueMap);
+ ::RemapInstruction(I, LastValueMap);
}
// The latch block exits the loop. If there are any PHI nodes in the
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LowerInvoke.cpp Tue Oct 26 19:48:03 2010
@@ -78,14 +78,16 @@
static char ID; // Pass identification, replacement for typeid
explicit LowerInvoke(const TargetLowering *tli = NULL,
bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport),
- TLI(tli) { }
+ : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+ TLI(tli) {
+ initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+ }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// This is a cluster of orthogonal Transforms
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
@@ -100,10 +102,11 @@
}
char LowerInvoke::ID = 0;
-static RegisterPass<LowerInvoke>
-X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false)
-const PassInfo *const llvm::LowerInvokePassID = &X;
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
@@ -147,19 +150,20 @@
"llvm.sjljeh.jblist");
}
-// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
-// so it looks like Intrinsic::_setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
-// let's return it to _setjmp state in case anyone ever needs it after this
-// point under VisualStudio
-#define setjmp _setjmp
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
#endif
LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
@@ -185,6 +189,7 @@
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Insert an unconditional branch to the normal destination.
@@ -265,6 +270,7 @@
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Replace the invoke with an uncond branch.
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/LowerSwitch.cpp Tue Oct 26 19:48:03 2010
@@ -29,19 +29,20 @@
namespace {
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
- /// instructions. Note that this cannot be a BasicBlock pass because it
- /// modifies the CFG!
+ /// instructions.
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSwitch() : FunctionPass(&ID) {}
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerInvokePassID);
}
@@ -50,8 +51,7 @@
Constant* High;
BasicBlock* BB;
- CaseRange() : Low(0), High(0), BB(0) { }
- CaseRange(Constant* low, Constant* high, BasicBlock* bb) :
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
Low(low), High(high), BB(bb) { }
};
@@ -81,11 +81,11 @@
}
char LowerSwitch::ID = 0;
-static RegisterPass<LowerSwitch>
-X("lowerswitch", "Lower SwitchInst's to branches");
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
// Publically exposed interface to pass...
-const PassInfo *const llvm::LowerSwitchID = &X;
+char &llvm::LowerSwitchID = LowerSwitch::ID;
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
return new LowerSwitch();
@@ -109,7 +109,8 @@
// operator<< - Used for debugging purposes.
//
static raw_ostream& operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/Mem2Reg.cpp Tue Oct 26 19:48:03 2010
@@ -27,7 +27,9 @@
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- PromotePass() : FunctionPass(&ID) {}
+ PromotePass() : FunctionPass(ID) {
+ initializePromotePassPass(*PassRegistry::getPassRegistry());
+ }
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
@@ -49,7 +51,12 @@
} // end of anonymous namespace
char PromotePass::ID = 0;
-static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register");
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
bool PromotePass::runOnFunction(Function &F) {
std::vector<AllocaInst*> Allocas;
@@ -81,8 +88,6 @@
return Changed;
}
-// Publically exposed interface to pass...
-const PassInfo *const llvm::PromoteMemoryToRegisterID = &X;
// createPromoteMemoryToRegister - Provide an entry point to create this pass.
//
FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Tue Oct 26 19:48:03 2010
@@ -228,14 +228,6 @@
void run();
- /// properlyDominates - Return true if I1 properly dominates I2.
- ///
- bool properlyDominates(Instruction *I1, Instruction *I2) const {
- if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
- I1 = II->getNormalDest()->begin();
- return DT.properlyDominates(I1->getParent(), I2->getParent());
- }
-
/// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
///
bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/SSAUpdater.cpp Tue Oct 26 19:48:03 2010
@@ -29,20 +29,21 @@
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete &getAvailableVals(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates. ProtoValue is the value used to name PHI nodes.
-void SSAUpdater::Initialize(Value *ProtoValue) {
+/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
if (AV == 0)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
- PrototypeValue = ProtoValue;
+ ProtoType = Ty;
+ ProtoName = Name;
}
/// HasValueForBlock - Return true if the SSAUpdater already has a value for
@@ -54,8 +55,8 @@
/// AddAvailableValue - Indicate that a rewritten value is available in the
/// specified block with the specified value.
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
- assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
- assert(PrototypeValue->getType() == V->getType() &&
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
"All rewritten values must have the same type");
getAvailableVals(AV)[BB] = V;
}
@@ -148,7 +149,7 @@
// If there are no predecessors, just return undef.
if (PredValues.empty())
- return UndefValue::get(PrototypeValue->getType());
+ return UndefValue::get(ProtoType);
// Otherwise, if all the merged values are the same, just use it.
if (SingularValue != 0)
@@ -168,9 +169,7 @@
}
// Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
- PrototypeValue->getName(),
- &BB->front());
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
InsertedPHI->reserveOperandSpace(PredValues.size());
// Fill in all the predecessors of the PHI.
@@ -205,6 +204,22 @@
U.set(V);
}
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
/// in the SSAUpdaterImpl template.
namespace {
@@ -266,15 +281,14 @@
/// GetUndefVal - Get an undefined value of the same type as the value
/// being handled.
static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
- return UndefValue::get(Updater->PrototypeValue->getType());
+ return UndefValue::get(Updater->ProtoType);
}
/// CreateEmptyPHI - Create a new PHI instruction in the specified block.
/// Reserve space for the operands but do not fill them in yet.
static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
- Updater->PrototypeValue->getName(),
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
&BB->front());
PHI->reserveOperandSpace(NumPreds);
return PHI;
Removed: llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/SSI.cpp (removed)
@@ -1,433 +0,0 @@
-//===------------------- SSI.cpp - Creates SSI Representation -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts a list of variables to the Static Single Information
-// form. This is a program representation described by Scott Ananian in his
-// Master Thesis: "The Static Single Information Form (1999)".
-// We are building an on-demand representation, that is, we do not convert
-// every single variable in the target function to SSI form. Rather, we receive
-// a list of target variables that must be converted. We also do not
-// completely convert a target variable to the SSI format. Instead, we only
-// change the variable in the points where new information can be attached
-// to its live range, that is, at branch points.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ssi"
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-
-using namespace llvm;
-
-static const std::string SSI_PHI = "SSI_phi";
-static const std::string SSI_SIG = "SSI_sigma";
-
-STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
-STATISTIC(NumPhiInserted, "Number of phi functions inserted");
-
-void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominanceFrontier>();
- AU.addRequiredTransitive<DominatorTree>();
- AU.setPreservesAll();
-}
-
-bool SSI::runOnFunction(Function &F) {
- DT_ = &getAnalysis<DominatorTree>();
- return false;
-}
-
-/// This methods creates the SSI representation for the list of values
-/// received. It will only create SSI representation if a value is used
-/// to decide a branch. Repeated values are created only once.
-///
-void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
- init(value);
-
- SmallPtrSet<Instruction*, 4> needConstruction;
- for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
- E = value.end(); I != E; ++I)
- if (created.insert(*I))
- needConstruction.insert(*I);
-
- insertSigmaFunctions(needConstruction);
-
- // Test if there is a need to transform to SSI
- if (!needConstruction.empty()) {
- insertPhiFunctions(needConstruction);
- renameInit(needConstruction);
- rename(DT_->getRoot());
- fixPhis();
- }
-
- clean();
-}
-
-/// Insert sigma functions (a sigma function is a phi function with one
-/// operator)
-///
-void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- for (Value::use_iterator begin = (*I)->use_begin(),
- end = (*I)->use_end(); begin != end; ++begin) {
- // Test if the Use of the Value is in a comparator
- if (CmpInst *CI = dyn_cast<CmpInst>(*begin)) {
- // Iterates through all uses of CmpInst
- for (Value::use_iterator begin_ci = CI->use_begin(),
- end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
- // Test if any use of CmpInst is in a Terminator
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(*begin_ci)) {
- insertSigma(TI, *I);
- }
- }
- }
- }
- }
-}
-
-/// Inserts Sigma Functions in every BasicBlock successor to Terminator
-/// Instruction TI. All inserted Sigma Function are related to Instruction I.
-///
-void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
- // Basic Block of the Terminator Instruction
- BasicBlock *BB = TI->getParent();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
- // Next Basic Block
- BasicBlock *BB_next = TI->getSuccessor(i);
- if (BB_next != BB &&
- BB_next->getSinglePredecessor() != NULL &&
- dominateAny(BB_next, I)) {
- PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
- PN->addIncoming(I, BB);
- sigmas[PN] = I;
- created.insert(PN);
- defsites[I].push_back(BB_next);
- ++NumSigmaInserted;
- }
- }
-}
-
-/// Insert phi functions when necessary
-///
-void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
- DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- // Test if there were any sigmas for this variable
- SmallPtrSet<BasicBlock *, 16> BB_visited;
-
- // Insert phi functions if there is any sigma function
- while (!defsites[*I].empty()) {
-
- BasicBlock *BB = defsites[*I].back();
-
- defsites[*I].pop_back();
- DominanceFrontier::iterator DF_BB = DF->find(BB);
-
- // The BB is unreachable. Skip it.
- if (DF_BB == DF->end())
- continue;
-
- // Iterates through all the dominance frontier of BB
- for (std::set<BasicBlock *>::iterator DF_BB_begin =
- DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
- DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
- BasicBlock *BB_dominated = *DF_BB_begin;
-
- // Test if has not yet visited this node and if the
- // original definition dominates this node
- if (BB_visited.insert(BB_dominated) &&
- DT_->properlyDominates(value_original[*I], BB_dominated) &&
- dominateAny(BB_dominated, *I)) {
- PHINode *PN = PHINode::Create(
- (*I)->getType(), SSI_PHI, BB_dominated->begin());
- phis.insert(std::make_pair(PN, *I));
- created.insert(PN);
-
- defsites[*I].push_back(BB_dominated);
- ++NumPhiInserted;
- }
- }
- }
- BB_visited.clear();
- }
-}
-
-/// Some initialization for the rename part
-///
-void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I)
- value_stack[*I].push_back(*I);
-}
-
-/// Renames all variables in the specified BasicBlock.
-/// Only variables that need to be rename will be.
-///
-void SSI::rename(BasicBlock *BB) {
- SmallPtrSet<Instruction*, 8> defined;
-
- // Iterate through instructions and make appropriate renaming.
- // For SSI_PHI (b = PHI()), store b at value_stack as a new
- // definition of the variable it represents.
- // For SSI_SIG (b = PHI(a)), substitute a with the current
- // value of a, present in the value_stack.
- // Then store bin the value_stack as the new definition of a.
- // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
- // all operands with its current value, present in value_stack.
- for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
- begin != end; ++begin) {
- Instruction *I = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
- Instruction* position;
-
- // Treat SSI_PHI
- if ((position = getPositionPhi(PN))) {
- value_stack[position].push_back(PN);
- defined.insert(position);
- // Treat SSI_SIG
- } else if ((position = getPositionSigma(PN))) {
- substituteUse(I);
- value_stack[position].push_back(PN);
- defined.insert(position);
- }
-
- // Treat all other PHI functions
- else {
- substituteUse(I);
- }
- }
-
- // Treat all other functions
- else {
- substituteUse(I);
- }
- }
-
- // This loop iterates in all BasicBlocks that are successors of the current
- // BasicBlock. For each SSI_PHI instruction found, insert an operand.
- // This operand is the current operand in value_stack for the variable
- // in "position". And the BasicBlock this operand represents is the current
- // BasicBlock.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
- BasicBlock *BB_succ = *SI;
-
- for (BasicBlock::iterator begin = BB_succ->begin(),
- notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
- Instruction *I = begin;
- PHINode *PN = dyn_cast<PHINode>(I);
- Instruction* position;
- if (PN && ((position = getPositionPhi(PN)))) {
- PN->addIncoming(value_stack[position].back(), BB);
- }
- }
- }
-
- // This loop calls rename on all children from this block. This time children
- // refers to a successor block in the dominance tree.
- DomTreeNode *DTN = DT_->getNode(BB);
- for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
- begin != end; ++begin) {
- DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
- BasicBlock *BB_children = DTN_children->getBlock();
- rename(BB_children);
- }
-
- // Now we remove all inserted definitions of a variable from the top of
- // the stack leaving the previous one as the top.
- for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
- DE = defined.end(); DI != DE; ++DI)
- value_stack[*DI].pop_back();
-}
-
-/// Substitute any use in this instruction for the last definition of
-/// the variable
-///
-void SSI::substituteUse(Instruction *I) {
- for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
- Value *operand = I->getOperand(i);
- for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
- VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
- if (operand == VI->second.front() &&
- I != VI->second.back()) {
- PHINode *PN_I = dyn_cast<PHINode>(I);
- PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
-
- // If a phi created in a BasicBlock is used as an operand of another
- // created in the same BasicBlock, this step marks this second phi,
- // to fix this issue later. It cannot be fixed now, because the
- // operands of the first phi are not final yet.
- if (PN_I && PN_vs &&
- VI->second.back()->getParent() == I->getParent()) {
-
- phisToFix.insert(PN_I);
- }
-
- I->setOperand(i, VI->second.back());
- break;
- }
- }
- }
-}
-
-/// Test if the BasicBlock BB dominates any use or definition of value.
-/// If it dominates a phi instruction that is on the same BasicBlock,
-/// that does not count.
-///
-bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
- for (Value::use_iterator begin = value->use_begin(),
- end = value->use_end(); begin != end; ++begin) {
- Instruction *I = cast<Instruction>(*begin);
- BasicBlock *BB_father = I->getParent();
- if (BB == BB_father && isa<PHINode>(I))
- continue;
- if (DT_->dominates(BB, BB_father)) {
- return true;
- }
- }
- return false;
-}
-
-/// When there is a phi node that is created in a BasicBlock and it is used
-/// as an operand of another phi function used in the same BasicBlock,
-/// LLVM looks this as an error. So on the second phi, the first phi is called
-/// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B. It also includes undef values for predecessors
-/// that were not included in the phi.
-///
-void SSI::fixPhis() {
- for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
- end = phisToFix.end(); begin != end; ++begin) {
- PHINode *PN = *begin;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
- PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
- if (PN_father && PN->getParent() == PN_father->getParent() &&
- !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
- BasicBlock *BB = PN->getIncomingBlock(i);
- int pos = PN_father->getBasicBlockIndex(BB);
- PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
- }
- }
- }
-
- for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
- end = phis.end(); begin != end; ++begin) {
- PHINode *PN = begin->first;
- BasicBlock *BB = PN->getParent();
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- SmallVector<BasicBlock*, 8> Preds(PI, PE);
- for (unsigned size = Preds.size();
- PI != PE && PN->getNumIncomingValues() != size; ++PI) {
- bool found = false;
- for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
- i < pn_end; ++i) {
- if (PN->getIncomingBlock(i) == *PI) {
- found = true;
- break;
- }
- }
- if (!found) {
- PN->addIncoming(UndefValue::get(PN->getType()), *PI);
- }
- }
- }
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the phis list.
-///
-Instruction* SSI::getPositionPhi(PHINode *PN) {
- DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
- if (val == phis.end())
- return 0;
- else
- return val->second;
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the sigmas list.
-///
-Instruction* SSI::getPositionSigma(PHINode *PN) {
- DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
- if (val == sigmas.end())
- return 0;
- else
- return val->second;
-}
-
-/// Initializes
-///
-void SSI::init(SmallVectorImpl<Instruction *> &value) {
- for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- value_original[*I] = (*I)->getParent();
- defsites[*I].push_back((*I)->getParent());
- }
-}
-
-/// Clean all used resources in this creation of SSI
-///
-void SSI::clean() {
- phis.clear();
- sigmas.clear();
- phisToFix.clear();
-
- defsites.clear();
- value_stack.clear();
- value_original.clear();
-}
-
-/// createSSIPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIPass() { return new SSI(); }
-
-char SSI::ID = 0;
-INITIALIZE_PASS(SSI, "ssi",
- "Static Single Information Construction", false, false);
-
-/// SSIEverything - A pass that runs createSSI on every non-void variable,
-/// intended for debugging.
-namespace {
- struct SSIEverything : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- SSIEverything() : FunctionPass(&ID) {}
-
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
- };
-}
-
-bool SSIEverything::runOnFunction(Function &F) {
- SmallVector<Instruction *, 16> Insts;
- SSI &ssi = getAnalysis<SSI>();
-
- if (F.isDeclaration() || F.isIntrinsic()) return false;
-
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
- for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
- if (!I->getType()->isVoidTy())
- Insts.push_back(I);
-
- ssi.createSSI(Insts);
- return true;
-}
-
-/// createSSIEverythingPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
-
-char SSIEverything::ID = 0;
-INITIALIZE_PASS(SSIEverything, "ssi-everything",
- "Static Single Information Construction", false, false);
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/SimplifyCFG.cpp Tue Oct 26 19:48:03 2010
@@ -1720,16 +1720,15 @@
bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool Changed = false;
- Function *M = BB->getParent();
+ Function *Fn = BB->getParent();
- assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB && Fn && "Block not embedded in function!");
assert(BB->getTerminator() && "Degenerate basic block encountered!");
- assert(&BB->getParent()->getEntryBlock() != BB &&
- "Can't Simplify entry block!");
- // Remove basic blocks that have no predecessors... or that just have themself
- // as a predecessor. These are unreachable.
- if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) && BB != &Fn->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
DEBUG(dbgs() << "Removing BB: \n" << *BB);
DeleteDeadBlock(BB);
return true;
@@ -1798,7 +1797,7 @@
// If we eliminated all predecessors of the block, delete the block now.
if (pred_begin(BB) == pred_end(BB))
// We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
+ Fn->getBasicBlockList().erase(BB);
return true;
}
@@ -1847,10 +1846,10 @@
Preds.pop_back();
}
- // If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB)) {
+ // If this block is now dead (and isn't the entry block), remove it.
+ if (pred_begin(BB) == pred_end(BB) && BB != &Fn->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
+ Fn->getBasicBlockList().erase(BB);
return true;
}
@@ -1880,8 +1879,9 @@
while (isa<DbgInfoIntrinsic>(BBI))
++BBI;
if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
- return true;
+ if (BB != &Fn->getEntryBlock())
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
} else { // Conditional branch
if (isValueEqualityComparison(BI)) {
@@ -2049,12 +2049,37 @@
}
// If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB)) {
+ if (pred_begin(BB) == pred_end(BB) && BB != &Fn->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
+ Fn->getBasicBlockList().erase(BB);
return true;
}
}
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ } else if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ }
}
// Merge basic blocks into their predecessor if there is only one distinct
@@ -2068,12 +2093,15 @@
// is a conditional branch, see if we can hoist any code from this block up
// into our predecessor.
pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- BasicBlock *OnlyPred = *PI++;
- for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
- if (*PI != OnlyPred) {
+ BasicBlock *OnlyPred = 0;
+ for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
+ if (!OnlyPred)
+ OnlyPred = *PI;
+ else if (*PI != OnlyPred) {
OnlyPred = 0; // There are multiple different predecessors...
break;
}
+ }
if (OnlyPred)
if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
@@ -2172,8 +2200,6 @@
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
-/// WARNING: The entry node of a function may not be simplified.
-///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
return SimplifyCFGOpt(TD).run(BB);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp Tue Oct 26 19:48:03 2010
@@ -25,7 +25,7 @@
char UnifyFunctionExitNodes::ID = 0;
INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
- "Unify function exit nodes", false, false);
+ "Unify function exit nodes", false, false)
Pass *llvm::createUnifyFunctionExitNodesPass() {
return new UnifyFunctionExitNodes();
@@ -35,7 +35,7 @@
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
// This is a cluster of orthogonal Transforms
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
Modified: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.cpp Tue Oct 26 19:48:03 2010
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Type.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -20,28 +20,51 @@
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
- Value *&VMSlot = VM[V];
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
+ bool ModuleLevelChanges) {
+ TrackingVH<Value> &VMSlot = VM[V];
if (VMSlot) return VMSlot; // Does it exist in the map yet?
// NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
// DenseMap. This includes any recursive calls to MapValue.
- // Global values and non-function-local metadata do not need to be seeded into
- // the VM if they are using the identity mapping.
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
- (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
+ (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
+ !ModuleLevelChanges))
return VMSlot = const_cast<Value*>(V);
if (const MDNode *MD = dyn_cast<MDNode>(V)) {
- SmallVector<Value*, 4> Elts;
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
- Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0);
- return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ // Start by assuming that we'll use the identity mapping.
+ VMSlot = const_cast<Value*>(V);
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+
+ // Ok, at least one operand needs remapping.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ VM[V] = Dummy;
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i)
+ Elts.push_back(MD->getOperand(i) ?
+ MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ Dummy->replaceAllUsesWith(NewMD);
+ MDNode::deleteTemporary(Dummy);
+ return VM[V] = NewMD;
+ }
+
+ // No operands needed remapping; keep the identity map.
+ return const_cast<Value*>(V);
}
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
@@ -51,7 +74,7 @@
if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This array must contain a reference to a global, make a new array
// and return it.
@@ -62,7 +85,8 @@
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantArray::get(CA->getType(), Values);
}
}
@@ -72,7 +96,7 @@
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This struct must contain a reference to a global, make a new struct
// and return it.
@@ -83,7 +107,8 @@
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantStruct::get(CS->getType(), Values);
}
}
@@ -93,14 +118,14 @@
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
std::vector<Constant*> Ops;
for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
- Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+ Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
return VM[V] = CE->getWithOperands(Ops);
}
if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This vector value must contain a reference to a global, make a new
// vector constant and return it.
@@ -111,7 +136,8 @@
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantVector::get(Values);
}
}
@@ -119,19 +145,33 @@
}
BlockAddress *BA = cast<BlockAddress>(C);
- Function *F = cast<Function>(MapValue(BA->getFunction(), VM));
- BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM));
+ Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
+ ModuleLevelChanges));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
+ ModuleLevelChanges));
return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges) {
+ // Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap);
+ Value *V = MapValue(*op, VMap, ModuleLevelChanges);
assert(V && "Referenced value not in value map!");
*op = V;
}
-}
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ Value *Old = MI->second;
+ Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+ if (New != Old)
+ I->setMetadata(MI->first, cast<MDNode>(New));
+ }
+}
Removed: llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h?rev=117424&view=auto
==============================================================================
--- llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h (original)
+++ llvm/branches/wendling/eh/lib/Transforms/Utils/ValueMapper.h (removed)
@@ -1,29 +0,0 @@
-//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MapValue interface which is used by various parts of
-// the Transforms/Utils library to implement cloning and linking facilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef VALUEMAPPER_H
-#define VALUEMAPPER_H
-
-#include "llvm/ADT/ValueMap.h"
-
-namespace llvm {
- class Value;
- class Instruction;
- typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
-
- Value *MapValue(const Value *V, ValueToValueMapTy &VM);
- void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
-} // End llvm namespace
-
-#endif
Modified: llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp (original)
+++ llvm/branches/wendling/eh/lib/VMCore/AsmWriter.cpp Tue Oct 26 19:48:03 2010
@@ -16,7 +16,7 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#include "llvm/LLVMContext.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
@@ -198,6 +198,7 @@
case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
case Type::LabelTyID: OS << "label"; break;
case Type::MetadataTyID: OS << "metadata"; break;
+ case Type::X86_MMXTyID: OS << "x86_mmx"; break;
case Type::IntegerTyID:
OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
break;
@@ -228,7 +229,7 @@
E = STy->element_end(); I != E; ++I) {
OS << ' ';
CalcTypeName(*I, TypeStack, OS);
- if (next(I) == STy->element_end())
+ if (llvm::next(I) == STy->element_end())
OS << ' ';
else
OS << ',';
@@ -238,21 +239,6 @@
OS << '>';
break;
}
- case Type::UnionTyID: {
- const UnionType *UTy = cast<UnionType>(Ty);
- OS << "union {";
- for (StructType::element_iterator I = UTy->element_begin(),
- E = UTy->element_end(); I != E; ++I) {
- OS << ' ';
- CalcTypeName(*I, TypeStack, OS);
- if (next(I) == UTy->element_end())
- OS << ' ';
- else
- OS << ',';
- }
- OS << '}';
- break;
- }
case Type::PointerTyID: {
const PointerType *PTy = cast<PointerType>(Ty);
CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -1042,16 +1028,6 @@
return;
}
- if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) {
- Out << "{ ";
- TypePrinter.print(CU->getOperand(0)->getType(), Out);
- Out << ' ';
- WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine,
- Context);
- Out << " }";
- return;
- }
-
if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
const Type *ETy = CP->getType()->getElementType();
assert(CP->getNumOperands() > 0 &&
@@ -1082,11 +1058,6 @@
return;
}
- if (const MDNode *Node = dyn_cast<MDNode>(CV)) {
- Out << "!" << Machine->getMetadataSlot(Node);
- return;
- }
-
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
Out << CE->getOpcodeName();
WriteOptimizationInfo(Out, CE);
@@ -1190,7 +1161,11 @@
else
Machine = new SlotTracker(Context);
}
- Out << '!' << Machine->getMetadataSlot(N);
+ int Slot = Machine->getMetadataSlot(N);
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
return;
}
@@ -1420,7 +1395,11 @@
Out << "!" << NMD->getName() << " = !{";
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
if (i) Out << ", ";
- Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i));
+ int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
}
Out << "}\n";
}
@@ -1435,6 +1414,9 @@
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "linker_private_weak ";
break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "linker_private_weak_def_auto ";
+ break;
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
@@ -1597,6 +1579,8 @@
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << "ptx_device"; break;
default: Out << "cc" << F->getCallingConv() << " "; break;
}
@@ -1657,11 +1641,10 @@
if (F->hasGC())
Out << " gc \"" << F->getGC() << '"';
if (F->isDeclaration()) {
- Out << "\n";
+ Out << '\n';
} else {
Out << " {";
-
- // Output all of its basic blocks... for the function
+ // Output all of the function's basic blocks.
for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
printBasicBlock(I);
@@ -1710,7 +1693,7 @@
Out.PadToColumn(50);
Out << "; Error: Block without parent!";
} else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
- // Output predecessors for the block...
+ // Output predecessors for the block.
Out.PadToColumn(50);
Out << ";";
const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
@@ -1748,13 +1731,6 @@
AnnotationWriter->printInfoComment(V, Out);
return;
}
-
- if (V.getType()->isVoidTy()) return;
-
- Out.PadToColumn(50);
- Out << "; <";
- TypePrinter.print(V.getType(), Out);
- Out << "> [#uses=" << V.getNumUses() << ']'; // Output # uses
}
// This member is called for each Instruction in a function..
@@ -1873,6 +1849,8 @@
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << " ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << " ptx_device"; break;
default: Out << " cc" << CI->getCallingConv(); break;
}
@@ -1927,6 +1905,8 @@
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << " ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << " ptx_device"; break;
default: Out << " cc" << II->getCallingConv(); break;
}
More information about the llvm-branch-commits
mailing list