[llvm-commits] [llvm] r53163 [5/7] - in /llvm/branches/non-call-eh: ./ autoconf/ bindings/ocaml/llvm/ docs/ docs/CommandGuide/ docs/tutorial/ examples/BrainF/ examples/Fibonacci/ examples/HowToUseJIT/ examples/ModuleMaker/ examples/ParallelJIT/ include/llvm-c/ include/llvm/ include/llvm/ADT/ include/llvm/Analysis/ include/llvm/Bitcode/ include/llvm/CodeGen/ include/llvm/Debugger/ include/llvm/ExecutionEngine/ include/llvm/Support/ include/llvm/System/ include/llvm/Target/ include/llvm/Transforms/ include/llvm/Transform...
Nick Lewycky
nicholas at mxc.ca
Sun Jul 6 13:45:51 PDT 2008
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.cpp Sun Jul 6 15:45:41 2008
@@ -25,7 +25,6 @@
#include "llvm/Intrinsics.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -41,7 +40,10 @@
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
-X86TargetLowering::X86TargetLowering(TargetMachine &TM)
+// Forward declarations.
+static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG);
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -264,6 +266,8 @@
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
@@ -287,18 +291,22 @@
if (!Subtarget->hasSSE2())
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom);
+ // Expand certain atomics
+ setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB , MVT::i32, Expand);
- // Use the default ISD::LOCATION, ISD::DECLARE expansion.
- setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
- !Subtarget->isTargetCygMing())
- setOperationAction(ISD::LABEL, MVT::Other, Expand);
+ !Subtarget->isTargetCygMing()) {
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ }
setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
@@ -320,12 +328,14 @@
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
- if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
- else
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -484,49 +494,51 @@
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand);
- setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
}
if (Subtarget->hasMMX()) {
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v2f32, X86::VR64RegisterClass);
addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
// FIXME: add MMX packed arithmetics
@@ -574,11 +586,14 @@
AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
@@ -605,6 +620,7 @@
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4f32, Legal);
}
if (Subtarget->hasSSE2()) {
@@ -630,6 +646,12 @@
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Legal);
+
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
@@ -637,13 +659,14 @@
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(MVT::getVectorNumElements(VT)))
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
- setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
@@ -658,16 +681,16 @@
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
- setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
- setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
- setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
- setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
- setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
- AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -677,11 +700,13 @@
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
}
if (Subtarget->hasSSE41()) {
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -708,6 +733,7 @@
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::STORE);
@@ -715,16 +741,15 @@
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
- maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
- maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
- maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
+ maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
allowUnalignedMemoryAccesses = true; // x86 supports it!
setPrefLoopAlignment(16);
}
-MVT::ValueType
-X86TargetLowering::getSetCCResultType(const SDOperand &) const {
+MVT X86TargetLowering::getSetCCResultType(const SDOperand &) const {
return MVT::i8;
}
@@ -768,6 +793,23 @@
return Align;
}
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+/// determining it.
+MVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const {
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
+ return MVT::v4i32;
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
+ return MVT::v4f32;
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
@@ -785,19 +827,6 @@
#include "X86GenCallingConv.inc"
-/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
-/// exists skip possible ISD:TokenFactor.
-static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
- if (Chain.getOpcode() == X86ISD::TAILCALL) {
- return Chain;
- } else if (Chain.getOpcode() == ISD::TokenFactor) {
- if (Chain.getNumOperands() &&
- Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL)
- return Chain.getOperand(0);
- }
- return Chain;
-}
-
/// LowerRET - Lower an ISD::RET node.
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
@@ -818,7 +847,7 @@
SDOperand Chain = Op.getOperand(0);
// Handle tail call return.
- Chain = GetPossiblePreceedingTailCall(Chain);
+ Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
if (Chain.getOpcode() == X86ISD::TAILCALL) {
SDOperand TailCall = Chain;
SDOperand TargetAddress = TailCall.getOperand(1);
@@ -871,10 +900,29 @@
// Don't emit a copytoreg.
continue;
}
-
+
Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() &&
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDOperand Val = DAG.getCopyFromReg(Chain, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, X86::RAX, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
RetOps[0] = Chain; // Update chain.
@@ -905,7 +953,7 @@
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
- MVT::ValueType CopyVT = RVLocs[i].getValVT();
+ MVT CopyVT = RVLocs[i].getValVT();
// If this is a call to a function that returns an fp value on the floating
// point stack, but where we prefer to use the value in xmm registers, copy
@@ -930,11 +978,11 @@
ResultVals.push_back(Val);
}
-
+
// Merge everything together with a MERGE_VALUES node.
ResultVals.push_back(Chain);
- return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
- &ResultVals[0], ResultVals.size()).Val;
+ return DAG.getMergeValues(TheCall->getVTList(), &ResultVals[0],
+ ResultVals.size()).Val;
}
@@ -1035,27 +1083,6 @@
return None;
}
-/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
-/// possibly be overwritten when lowering the outgoing arguments in a tail
-/// call. Currently the implementation of this call is very conservative and
-/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
-/// virtual registers would be overwritten by direct lowering.
-static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op,
- MachineFrameInfo * MFI) {
- RegisterSDNode * OpReg = NULL;
- FrameIndexSDNode * FrameIdxNode = NULL;
- int FrameIdx = 0;
- if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
- (Op.getOpcode()== ISD::CopyFromReg &&
- (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
- (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
- (Op.getOpcode() == ISD::LOAD &&
- (FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op.getOperand(1))) &&
- (MFI->isFixedObjectIndex((FrameIdx = FrameIdxNode->getIndex()))) &&
- (MFI->getObjectOffset(FrameIdx) >= 0)))
- return true;
- return false;
-}
/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
/// in a register before calling.
@@ -1065,7 +1092,6 @@
Subtarget->isPICStyleGOT();
}
-
/// CallRequiresFnAddressInReg - Check whether the call requires the function
/// address to be loaded in a register.
bool
@@ -1075,33 +1101,6 @@
Subtarget->isPICStyleGOT();
}
-/// CopyTailCallClobberedArgumentsToVRegs - Create virtual registers for all
-/// arguments to force loading and guarantee that arguments sourcing from
-/// incomming parameters are not overwriting each other.
-static SDOperand
-CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain,
- SmallVector<std::pair<unsigned, SDOperand>, 8> &TailCallClobberedVRegs,
- SelectionDAG &DAG,
- MachineFunction &MF,
- const TargetLowering * TL) {
-
- SDOperand InFlag;
- for (unsigned i = 0, e = TailCallClobberedVRegs.size(); i != e; i++) {
- SDOperand Arg = TailCallClobberedVRegs[i].second;
- unsigned Idx = TailCallClobberedVRegs[i].first;
- unsigned VReg =
- MF.getRegInfo().
- createVirtualRegister(TL->getRegClassFor(Arg.getValueType()));
- Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag);
- InFlag = Chain.getValue(1);
- Arg = DAG.getCopyFromReg(Chain, VReg, Arg.getValueType(), InFlag);
- TailCallClobberedVRegs[i] = std::make_pair(Idx, Arg);
- Chain = Arg.getValue(1);
- InFlag = Arg.getValue(2);
- }
- return Chain;
-}
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
@@ -1111,8 +1110,7 @@
ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
- /*AlwaysInline=*/true,
- NULL, 0, NULL, 0);
+ /*AlwaysInline=*/true, NULL, 0, NULL, 0);
}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
@@ -1130,7 +1128,7 @@
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
- int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal())
@@ -1158,6 +1156,7 @@
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
unsigned CC = MF.getFunction()->getCallingConv();
bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && CC == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
@@ -1178,7 +1177,7 @@
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
+ MVT RegVT = VA.getLocVT();
TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
@@ -1188,13 +1187,25 @@
RC = X86::FR32RegisterClass;
else if (RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) {
- RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
- RegVT = MVT::i64;
- } else
- RC = X86::VR128RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ RC = X86::VR128RegisterClass;
+ else if (RegVT.isVector()) {
+ assert(RegVT.getSizeInBits() == 64);
+ if (!Is64Bit)
+ RC = X86::VR64RegisterClass; // MMX values are passed in MMXs.
+ else {
+ // Darwin calling convention passes MMX values in either GPRs or
+ // XMMs in x86-64. Other targets pass them in memory.
+ if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
+ RC = X86::VR128RegisterClass; // MMX values are passed in XMMs.
+ RegVT = MVT::v2i64;
+ } else {
+ RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs.
+ RegVT = MVT::i64;
+ }
+ }
+ } else {
+ assert(0 && "Unknown argument type!");
}
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
@@ -1214,9 +1225,15 @@
ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
// Handle MMX values passed in GPRs.
- if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
- MVT::getSizeInBits(RegVT) == 64)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
+ if (Is64Bit && RegVT != VA.getLocVT()) {
+ if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
+ else if (RC == X86::VR128RegisterClass) {
+ ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i64, ArgValue,
+ DAG.getConstant(0, MVT::i64));
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
+ }
+ }
ArgValues.push_back(ArgValue);
} else {
@@ -1225,6 +1242,21 @@
}
}
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDOperand Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]);
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root);
+ }
+
unsigned StackSize = CCInfo.getNextStackOffset();
// align stack specially for tail calls
if (CC == CallingConv::Fast)
@@ -1237,30 +1269,52 @@
VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
}
if (Is64Bit) {
- static const unsigned GPR64ArgRegs[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+ // FIXME: We should really autogenerate these arrays
+ static const unsigned GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegsWin64[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
- static const unsigned XMMArgRegs[] = {
+ static const unsigned GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
-
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
-
+ const unsigned *GPR64ArgRegs, *XMMArgRegs;
+
+ if (IsWin64) {
+ TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+ GPR64ArgRegs = GPR64ArgRegsWin64;
+ XMMArgRegs = XMMArgRegsWin64;
+ } else {
+ TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+ GPR64ArgRegs = GPR64ArgRegs64Bit;
+ XMMArgRegs = XMMArgRegs64Bit;
+ }
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+ TotalNumIntRegs);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
+ TotalNumXMMRegs);
+
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
// may be loaded by deferencing the result of va_next.
VarArgsGPOffset = NumIntRegs * 8;
- VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
- RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
-
+ VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
+ RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
+ TotalNumXMMRegs * 16, 16);
+
// Store the integer parameter registers.
SmallVector<SDOperand, 8> MemOps;
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(VarArgsGPOffset));
- for (; NumIntRegs != 6; ++NumIntRegs) {
+ for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
@@ -1272,11 +1326,11 @@
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(8));
}
-
+
// Now store the XMM (fp + vector) parameter registers.
FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(VarArgsFPOffset));
- for (; NumXMMRegs != 8; ++NumXMMRegs) {
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
@@ -1325,8 +1379,8 @@
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+ return DAG.getMergeValues(Op.Val->getVTList(), &ArgValues[0],
+ ArgValues.size()).getValue(Op.ResNo);
}
SDOperand
@@ -1359,7 +1413,7 @@
if (!IsTailCall || FPDiff==0) return Chain;
// Adjust the Return address stack slot.
- MVT::ValueType VT = getPointerTy();
+ MVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0);
@@ -1378,72 +1432,15 @@
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
+ MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx,
PseudoSourceValue::getFixedStack(), NewReturnAddrFI);
return Chain;
}
-/// CopyTailCallByValClobberedRegToVirtReg - Copy arguments with register target
-/// which might be overwritten by later byval tail call lowering to a virtual
-/// register.
-bool
-X86TargetLowering::CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg,
- SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
- SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes,
- std::pair<unsigned, SDOperand> &RegToPass,
- SDOperand &OutChain,
- SDOperand &OutFlag,
- MachineFunction &MF,
- SelectionDAG & DAG) {
- if (!containsByValArg) return false;
-
- std::pair<unsigned, unsigned> ArgRegVReg;
- MVT::ValueType VT = RegToPass.second.getValueType();
-
- ArgRegVReg.first = RegToPass.first;
- ArgRegVReg.second = MF.getRegInfo().createVirtualRegister(getRegClassFor(VT));
-
- // Copy Argument to virtual register.
- OutChain = DAG.getCopyToReg(OutChain, ArgRegVReg.second,
- RegToPass.second, OutFlag);
- OutFlag = OutChain.getValue(1);
- // Remember virtual register and type.
- TailCallByValClobberedVRegs.push_back(ArgRegVReg);
- TailCallByValClobberedVRegTypes.push_back(VT);
- return true;
-}
-
-
-/// RestoreTailCallByValClobberedReg - Restore registers which were saved to
-/// virtual registers to prevent tail call byval lowering from overwriting
-/// parameter registers.
-static SDOperand
-RestoreTailCallByValClobberedRegs(SelectionDAG & DAG, SDOperand Chain,
- SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
- SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes) {
- if (TailCallByValClobberedVRegs.size()==0) return Chain;
-
- SmallVector<SDOperand, 8> RegOpChains;
- for (unsigned i = 0, e=TailCallByValClobberedVRegs.size(); i != e; i++) {
- SDOperand InFlag;
- unsigned DestReg = TailCallByValClobberedVRegs[i].first;
- unsigned VirtReg = TailCallByValClobberedVRegs[i].second;
- MVT::ValueType VT = TailCallByValClobberedVRegTypes[i];
- SDOperand Tmp = DAG.getCopyFromReg(Chain, VirtReg, VT, InFlag);
- Chain = DAG.getCopyToReg(Chain, DestReg, Tmp, InFlag);
- RegOpChains.push_back(Chain);
- }
- if (!RegOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &RegOpChains[0], RegOpChains.size());
- return Chain;
-}
-
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo * MFI = MF.getFrameInfo();
SDOperand Chain = Op.getOperand(0);
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
@@ -1494,18 +1491,11 @@
FPDiff);
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<std::pair<unsigned, SDOperand>, 8> TailCallClobberedVRegs;
-
SmallVector<SDOperand, 8> MemOpChains;
-
SDOperand StackPtr;
- bool containsTailCallByValArg = false;
- SmallVector<std::pair<unsigned, unsigned>, 8> TailCallByValClobberedVRegs;
- SmallVector<MVT::ValueType, 8> TailCallByValClobberedVRegTypes;
-
- // Walk the register/memloc assignments, inserting copies/loads. For tail
- // calls, remember all arguments for later special lowering.
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
@@ -1528,6 +1518,30 @@
}
if (VA.isRegLoc()) {
+ if (Is64Bit) {
+ MVT RegVT = VA.getLocVT();
+ if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ switch (VA.getLocReg()) {
+ default:
+ break;
+ case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
+ case X86::R8: {
+ // Special case: passing MMX values in GPR registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg);
+ break;
+ }
+ case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
+ case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Arg);
+ Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
+ DAG.getNode(ISD::UNDEF, MVT::v2i64), Arg,
+ getMOVLMask(2, DAG));
+ break;
+ }
+ }
+ }
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
if (!IsTailCall || (IsTailCall && isByVal)) {
@@ -1537,10 +1551,6 @@
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg));
- // Remember fact that this call contains byval arguments.
- containsTailCallByValArg |= IsTailCall && isByVal;
- } else if (IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
- TailCallClobberedVRegs.push_back(std::make_pair(i,Arg));
}
}
}
@@ -1552,21 +1562,14 @@
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- // Tail call byval lowering might overwrite argument registers so arguments
- // passed to be copied to a virtual register for
- // later processing.
- if (CopyTailCallByValClobberedRegToVirtReg(containsTailCallByValArg,
- TailCallByValClobberedVRegs,
- TailCallByValClobberedVRegTypes,
- RegsToPass[i], Chain, InFlag, MF,
- DAG))
- continue;
-
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!IsTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
@@ -1600,7 +1603,8 @@
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
-
+
+ // FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
@@ -1621,10 +1625,6 @@
int FI = 0;
// Do not flag preceeding copytoreg stuff together with the following stuff.
InFlag = SDOperand();
-
- Chain = CopyTailCallClobberedArgumentsToVRegs(Chain, TailCallClobberedVRegs,
- DAG, MF, this);
-
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) {
@@ -1635,21 +1635,10 @@
cast<ARG_FLAGSSDNode>(FlagsOp)->getArgFlags();
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
- uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
FIN = DAG.getFrameIndex(FI, getPointerTy());
- // Find virtual register for this argument.
- bool Found=false;
- for (unsigned idx=0, e= TailCallClobberedVRegs.size(); idx < e; idx++)
- if (TailCallClobberedVRegs[idx].first==i) {
- Arg = TailCallClobberedVRegs[idx].second;
- Found=true;
- break;
- }
- assert(IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)==false ||
- (Found==true && "No corresponding Argument was found"));
-
if (Flags.isByVal()) {
// Copy relative to framepointer.
SDOperand Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
@@ -1672,10 +1661,13 @@
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
- // Restore byval lowering clobbered registers.
- Chain = RestoreTailCallByValClobberedRegs(DAG, Chain,
- TailCallByValClobberedVRegs,
- TailCallByValClobberedVRegTypes);
+ // Copy arguments to their registers.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDOperand();
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
@@ -1853,15 +1845,7 @@
if (!PerformTailCallOpt)
return false;
- // Check whether CALL node immediatly preceeds the RET node and whether the
- // return uses the result of the node or is a void return.
- unsigned NumOps = Ret.getNumOperands();
- if ((NumOps == 1 &&
- (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
- Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
- (NumOps > 1 &&
- Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
- Ret.getOperand(1) == SDOperand(Call.Val,0))) {
+ if (CheckTailCallReturnConstraints(Call, Ret)) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned CallerCC = MF.getFunction()->getCallingConv();
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
@@ -2588,9 +2572,9 @@
static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
SDOperand &V2, SDOperand &Mask,
SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType MaskVT = Mask.getValueType();
- MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ MVT VT = Op.getValueType();
+ MVT MaskVT = Mask.getValueType();
+ MVT EltVT = MaskVT.getVectorElementType();
unsigned NumElems = Mask.getNumOperands();
SmallVector<SDOperand, 8> MaskVec;
@@ -2617,8 +2601,8 @@
/// the two vector operands have swapped position.
static
SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) {
- MVT::ValueType MaskVT = Mask.getValueType();
- MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = Mask.getValueType();
+ MVT EltVT = MaskVT.getVectorElementType();
unsigned NumElems = Mask.getNumOperands();
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i != NumElems; ++i) {
@@ -2656,11 +2640,16 @@
}
/// isScalarLoadToVector - Returns true if the node is a scalar load that
-/// is promoted to a vector.
-static inline bool isScalarLoadToVector(SDNode *N) {
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
N = N->getOperand(0).Val;
- return ISD::isNON_EXTLoad(N);
+ if (ISD::isNON_EXTLoad(N)) {
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+ }
}
return false;
}
@@ -2772,30 +2761,35 @@
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
- assert(MVT::isVector(VT) && "Expected a vector type");
+static SDOperand getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG) {
+ assert(VT.isVector() && "Expected a vector type");
// Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
// type. This ensures they get CSE'd.
- SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
SDOperand Vec;
- if (MVT::getSizeInBits(VT) == 64) // MMX
+ if (VT.getSizeInBits() == 64) { // MMX
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
- else // SSE
+ } else if (HasSSE2) { // SSE2
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDOperand Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
}
/// getOnesVector - Returns a vector of specified type with all bits set.
///
-static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) {
- assert(MVT::isVector(VT) && "Expected a vector type");
+static SDOperand getOnesVector(MVT VT, SelectionDAG &DAG) {
+ assert(VT.isVector() && "Expected a vector type");
// Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
// type. This ensures they get CSE'd.
SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDOperand Vec;
- if (MVT::getSizeInBits(VT) == 64) // MMX
+ if (VT.getSizeInBits() == 64) // MMX
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
else // SSE
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -2832,8 +2826,8 @@
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
@@ -2845,8 +2839,8 @@
/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
/// of specified width.
static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
MaskVec.push_back(DAG.getConstant(i, BaseVT));
@@ -2858,8 +2852,8 @@
/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
/// of specified width.
static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
unsigned Half = NumElems/2;
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i != Half; ++i) {
@@ -2874,8 +2868,8 @@
/// elements in place.
static SDOperand getSwapEltZeroMask(unsigned NumElems, unsigned DestElt,
SelectionDAG &DAG) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT BaseVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
// Element #0 of the result gets the elt we are replacing.
MaskVec.push_back(DAG.getConstant(DestElt, BaseVT));
@@ -2886,8 +2880,8 @@
/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG, bool HasSSE2) {
- MVT::ValueType PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32;
- MVT::ValueType VT = Op.getValueType();
+ MVT PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32;
+ MVT VT = Op.getValueType();
if (PVT == VT)
return Op;
SDOperand V1 = Op.getOperand(0);
@@ -2900,7 +2894,7 @@
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
- Mask = getZeroVector(MVT::v4i32, DAG);
+ Mask = getZeroVector(MVT::v4i32, true, DAG);
}
V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
@@ -2914,12 +2908,14 @@
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, unsigned Idx,
- bool isZero, SelectionDAG &DAG) {
- MVT::ValueType VT = V2.getValueType();
- SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
- unsigned NumElems = MVT::getVectorNumElements(V2.getValueType());
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
+ bool isZero, bool HasSSE2,
+ SelectionDAG &DAG) {
+ MVT VT = V2.getValueType();
+ SDOperand V1 = isZero
+ ? getZeroVector(VT, HasSSE2, DAG) : DAG.getNode(ISD::UNDEF, VT);
+ unsigned NumElems = V2.getValueType().getVectorNumElements();
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT EVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
if (i == Idx) // If this is the insertion idx, put the low elt of V2 here.
@@ -2931,6 +2927,70 @@
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+/// getNumOfConsecutiveZeros - Return the number of elements in a result of
+/// a shuffle that is zero.
+static
+unsigned getNumOfConsecutiveZeros(SDOperand Op, SDOperand Mask,
+ unsigned NumElems, bool Low,
+ SelectionDAG &DAG) {
+ unsigned NumZeros = 0;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ unsigned Index = Low ? i : NumElems-i-1;
+ SDOperand Idx = Mask.getOperand(Index);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ ++NumZeros;
+ continue;
+ }
+ SDOperand Elt = DAG.getShuffleScalarElt(Op.Val, Index);
+ if (Elt.Val && isZeroNode(Elt))
+ ++NumZeros;
+ else
+ break;
+ }
+ return NumZeros;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(SDOperand Op, SDOperand Mask, SelectionDAG &DAG,
+ bool &isLeft, SDOperand &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = Mask.getNumOperands();
+
+ isLeft = true;
+ unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG);
+ if (!NumZeros) {
+ isLeft = false;
+ NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG);
+ if (!NumZeros)
+ return false;
+ }
+
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+ for (unsigned i = NumZeros; i < NumElems; ++i) {
+ unsigned Val = isLeft ? (i - NumZeros) : i;
+ SDOperand Idx = Mask.getOperand(isLeft ? i : (i - NumZeros));
+ if (Idx.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
+ if (Index < NumElems)
+ SeenV1 = true;
+ else {
+ Index -= NumElems;
+ SeenV2 = true;
+ }
+ if (Index != Val)
+ return false;
+ }
+ if (SeenV1 && SeenV2)
+ return false;
+
+ ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1);
+ ShAmt = NumZeros;
+ return true;
+}
+
+
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
@@ -2945,7 +3005,7 @@
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, DAG);
+ V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
@@ -2990,7 +3050,7 @@
if (isNonZero) {
if (First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, DAG);
+ V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
@@ -3003,6 +3063,20 @@
return V;
}
+/// getVShift - Return a vector logical shift node.
+///
+static SDOperand getVShift(bool isLeft, MVT VT, SDOperand SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ bool isMMX = VT.getSizeInBits() == 64;
+ MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp);
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(Opc, ShVT, SrcOp,
+ DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+}
+
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3015,12 +3089,12 @@
if (ISD::isBuildVectorAllOnes(Op.Val))
return getOnesVector(Op.getValueType(), DAG);
- return getZeroVector(Op.getValueType(), DAG);
+ return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG);
}
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
- unsigned EVTBits = MVT::getSizeInBits(EVT);
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ unsigned EVTBits = EVT.getSizeInBits();
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
@@ -3063,14 +3137,15 @@
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle MMX and SSE both.
- MVT::ValueType VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
- MVT::ValueType VecElts = VT == MVT::v2i64 ? 4 : 2;
+ MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+ unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true, DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
@@ -3095,7 +3170,17 @@
(EVT != MVT::i64 || Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
+ return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = VT.getSizeInBits();
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
@@ -3110,9 +3195,10 @@
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
- Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
@@ -3133,8 +3219,17 @@
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
- if (EVTBits == 64)
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDOperand V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true,
+ Subtarget->hasSSE2(), DAG);
+ }
return SDOperand();
+ }
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
@@ -3156,7 +3251,7 @@
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, DAG);
+ V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
@@ -3182,15 +3277,8 @@
}
}
- // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
- // clears the upper bits.
- // FIXME: we can do the same for v4f32 case when we know both parts of
- // the lower half come from scalar_to_vector (loadf32). We should do
- // that in post legalizer dag combiner with target specific hooks.
- if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
- return V[0];
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT EVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
@@ -3236,9 +3324,9 @@
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
SDOperand NewV;
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
- MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
- MVT::ValueType PtrVT = TLI.getPointerTy();
+ MVT MaskVT = MVT::getIntVectorWithNumElements(8);
+ MVT MaskEVT = MaskVT.getVectorElementType();
+ MVT PtrVT = TLI.getPointerTy();
SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
PermMask.Val->op_end());
@@ -3362,8 +3450,6 @@
continue;
SDOperand Elt = MaskElts[i];
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
- if (EltIdx == i)
- continue;
SDOperand ExtOp = (EltIdx < 8)
? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT))
@@ -3478,23 +3564,23 @@
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
- MVT::ValueType VT,
+ MVT VT,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
- MVT::ValueType NewVT = MaskVT;
- switch (VT) {
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT NewVT = MaskVT;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
case MVT::v16i8: NewVT = MVT::v4i32; break;
- default: assert(false && "Unexpected!");
}
if (NewWidth == 2) {
- if (MVT::isInteger(VT))
+ if (VT.isInteger())
NewVT = MVT::v2i64;
else
NewVT = MVT::v2f64;
@@ -3526,14 +3612,46 @@
&MaskVec[0], MaskVec.size()));
}
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDOperand getVZextMovL(MVT VT, MVT OpVT,
+ SDOperand SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.Val, &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
+ SrcOp.getOperand(0).getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
+ DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
+}
+
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
- MVT::ValueType VT = Op.getValueType();
+ MVT VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
- bool isMMX = MVT::getSizeInBits(VT) == 64;
+ bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
@@ -3543,7 +3661,7 @@
return DAG.getNode(ISD::UNDEF, VT);
if (isZeroShuffle(Op.Val))
- return getZeroVector(VT, DAG);
+ return getZeroVector(VT, Subtarget->hasSSE2(), DAG);
if (isIdentityMask(PermMask.Val))
return V1;
@@ -3566,27 +3684,46 @@
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.Val)) {
- SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
+ DAG, *this);
if (NewOp.Val) {
SDOperand NewV1 = NewOp.getOperand(0);
SDOperand NewV2 = NewOp.getOperand(1);
SDOperand NewMask = NewOp.getOperand(2);
if (isCommutedMOVL(NewMask.Val, true, false)) {
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
- NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
- NewV1, NewV2, getMOVLMask(2, DAG));
- return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
}
}
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
- SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
+ DAG, *this);
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
- return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+ DAG, Subtarget);
}
}
- if (X86::isMOVLMask(PermMask.Val))
- return (V1IsUndef) ? V2 : Op;
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDOperand ShVal;
+ bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ MVT EVT = VT.getVectorElementType();
+ ShAmt *= EVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
+ }
+
+ if (X86::isMOVLMask(PermMask.Val)) {
+ if (V1IsUndef)
+ return V2;
+ if (ISD::isBuildVectorAllZeros(V1.Val))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget);
+ return Op;
+ }
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
@@ -3599,6 +3736,13 @@
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ MVT EVT = VT.getVectorElementType();
+ ShAmt *= EVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
+ }
+
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
@@ -3679,7 +3823,7 @@
(X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val))) {
- MVT::ValueType RVT = VT;
+ MVT RVT = VT;
if (VT == MVT::v4f32) {
RVT = MVT::v4i32;
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT,
@@ -3709,8 +3853,8 @@
// Handle all 4 wide cases with a number of shuffles.
if (NumElems == 4 && !isMMX) {
// Don't do this for MMX.
- MVT::ValueType MaskVT = PermMask.getValueType();
- MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ MVT MaskVT = PermMask.getValueType();
+ MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
SmallVector<SDOperand, 8> Mask1(NumElems,
@@ -3817,14 +3961,14 @@
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- if (MVT::getSizeInBits(VT) == 8) {
+ MVT VT = Op.getValueType();
+ if (VT.getSizeInBits() == 8) {
SDOperand Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
- } else if (MVT::getSizeInBits(VT) == 16) {
+ } else if (VT.getSizeInBits() == 16) {
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
@@ -3861,9 +4005,9 @@
return Res;
}
- MVT::ValueType VT = Op.getValueType();
+ MVT VT = Op.getValueType();
// TODO: handle v16i8.
- if (MVT::getSizeInBits(VT) == 16) {
+ if (VT.getSizeInBits() == 16) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
@@ -3872,27 +4016,27 @@
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
- MVT::ValueType EVT = (MVT::ValueType)(VT+1);
+ MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
- } else if (MVT::getSizeInBits(VT) == 32) {
+ } else if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.
- push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
+ push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType()));
IdxVec.
- push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
- push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
- push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
@@ -3900,7 +4044,7 @@
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
- } else if (MVT::getSizeInBits(VT) == 64) {
+ } else if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
@@ -3911,11 +4055,11 @@
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ MVT MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
- IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
+ IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType()));
IdxVec.
- push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
+ push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
@@ -3930,15 +4074,15 @@
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG){
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
- if ((MVT::getSizeInBits(EVT) == 8) || (MVT::getSizeInBits(EVT) == 16)) {
- unsigned Opc = (MVT::getSizeInBits(EVT) == 8) ? X86ISD::PINSRB
+ if ((EVT.getSizeInBits() == 8) || (EVT.getSizeInBits() == 16)) {
+ unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
: X86ISD::PINSRW;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
@@ -3964,8 +4108,8 @@
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
@@ -3977,7 +4121,7 @@
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
- if (MVT::getSizeInBits(EVT) == 16) {
+ if (EVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
@@ -3992,8 +4136,8 @@
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
- MVT::ValueType VT = MVT::v2i32;
- switch (Op.getValueType()) {
+ MVT VT = MVT::v2i32;
+ switch (Op.getValueType().getSimpleVT()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
@@ -4032,9 +4176,6 @@
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
- // If it's a debug information descriptor, don't mess with it.
- if (DAG.isVerifiedDebugInfoDesc(Op))
- return Result;
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
@@ -4056,10 +4197,10 @@
return Result;
}
-// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
static SDOperand
-LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const MVT::ValueType PtrVT) {
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
SDOperand InFlag;
SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
@@ -4094,11 +4235,43 @@
return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
}
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDOperand
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
+ SDOperand InFlag, Chain;
+
+ // emit leaq symbol at TLSGD(%rip), %rdi
+ SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
+ SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ SDOperand Ops[] = { DAG.getEntryNode(), TGA};
+ SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2);
+ Chain = Result.getValue(1);
+ InFlag = Result.getValue(2);
+
+ // call ___tls_get_addr. This function receives its argument in
+ // the register RDI.
+ Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag);
+ InFlag = Chain.getValue(1);
+
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Ops1[] = { Chain,
+ DAG.getTargetExternalSymbol("___tls_get_addr",
+ PtrVT),
+ DAG.getRegister(X86::RDI, PtrVT),
+ InFlag };
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4);
+ InFlag = Chain.getValue(1);
+
+ return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag);
+}
+
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
-static SDOperand
-LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const MVT::ValueType PtrVT) {
+static SDOperand LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
// Get the Thread Pointer
SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
// emit "addl x at ntpoff,%eax" (local exec) or "addl x at indntpoff,%eax" (initial
@@ -4121,15 +4294,19 @@
X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
- assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF and 64-bit targets");
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
// If the relocation model is PIC, use the "General Dynamic" TLS Model,
// otherwise use the "Local Exec"TLS Model
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
- return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
- else
- return LowerToTLSExecModel(GA, DAG, getPointerTy());
+ if (Subtarget->is64Bit()) {
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+ } else {
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+ else
+ return LowerToTLSExecModel(GA, DAG, getPointerTy());
+ }
}
SDOperand
@@ -4167,8 +4344,8 @@
/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
- MVT::ValueType VT = Op.getValueType();
- unsigned VTBits = MVT::getSizeInBits(VT);
+ MVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDOperand ShOpLo = Op.getOperand(0);
SDOperand ShOpHi = Op.getOperand(1);
@@ -4186,7 +4363,6 @@
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt);
}
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDOperand Cond = DAG.getNode(X86ISD::CMP, VT,
@@ -4194,46 +4370,24 @@
SDOperand Hi, Lo;
SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- VTs = DAG.getNodeValueTypes(VT, MVT::Flag);
- SmallVector<SDOperand, 4> Ops;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
+ SDOperand Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+ SDOperand Ops1[4] = { Tmp3, Tmp1, CC, Cond };
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
} else {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
+ Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
}
- VTs = DAG.getNodeValueTypes(VT, VT);
- Ops.clear();
- Ops.push_back(Lo);
- Ops.push_back(Hi);
- return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
+ SDOperand Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2);
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
- assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
+ MVT SrcVT = Op.getOperand(0).getValueType();
+ assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; caller falls through into that case.
@@ -4243,7 +4397,7 @@
Subtarget->is64Bit())
return SDOperand();
- unsigned Size = MVT::getSizeInBits(SrcVT)/8;
+ unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
@@ -4293,7 +4447,8 @@
std::pair<SDOperand,SDOperand> X86TargetLowering::
FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
+ assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
+ Op.getValueType().getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
// These are really Legal.
@@ -4308,11 +4463,11 @@
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
- unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
+ unsigned MemSize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
- switch (Op.getValueType()) {
+ switch (Op.getValueType().getSimpleVT()) {
default: assert(0 && "Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
@@ -4355,19 +4510,23 @@
std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
SDOperand FIST = Vals.first, StackSlot = Vals.second;
if (FIST.Val == 0) return 0;
-
- // Return an i64 load from the stack slot.
- SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0);
- // Use a MERGE_VALUES node to drop the chain result value.
- return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val;
-}
+ MVT VT = N->getValueType(0);
+
+ // Return a load from the stack slot.
+ SDOperand Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0);
+
+ // Use MERGE_VALUES to drop the chain result value and get a node with one
+ // result. This requires turning off getMergeValues simplification, since
+ // otherwise it will give us Res back.
+ return DAG.getMergeValues(&Res, 1, false).Val;
+}
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType EltVT = VT;
- if (MVT::isVector(VT))
- EltVT = MVT::getVectorElementType(VT);
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
@@ -4389,12 +4548,12 @@
}
SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType EltVT = VT;
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT;
unsigned EltNum = 1;
- if (MVT::isVector(VT)) {
- EltVT = MVT::getVectorElementType(VT);
- EltNum = MVT::getVectorNumElements(VT);
+ if (VT.isVector()) {
+ EltVT = VT.getVectorElementType();
+ EltNum = VT.getVectorNumElements();
}
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
@@ -4413,7 +4572,7 @@
SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
- if (MVT::isVector(VT)) {
+ if (VT.isVector()) {
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(ISD::XOR, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
@@ -4426,16 +4585,16 @@
SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
SDOperand Op0 = Op.getOperand(0);
SDOperand Op1 = Op.getOperand(1);
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType SrcVT = Op1.getValueType();
+ MVT VT = Op.getValueType();
+ MVT SrcVT = Op1.getValueType();
// If second operand is smaller, extend it first.
- if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
+ if (SrcVT.bitsLT(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
SrcVT = VT;
}
// And if it is bigger, shrink it first.
- if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ if (SrcVT.bitsGT(VT)) {
Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
}
@@ -4462,7 +4621,7 @@
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
- if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ if (SrcVT.bitsGT(VT)) {
// Op0 is MVT::f32, Op1 is MVT::f64.
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
@@ -4501,7 +4660,7 @@
SDOperand Op1 = Op.getOperand(1);
SDOperand CC = Op.getOperand(2);
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
unsigned X86CC;
if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
@@ -4549,10 +4708,10 @@
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- MVT::ValueType VT = Op.getValueType();
+ MVT VT = Op.getValueType();
bool IllegalFPCMov = false;
- if (MVT::isFloatingPoint(VT) && !MVT::isVector(VT) &&
+ if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
@@ -4569,7 +4728,7 @@
Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
}
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(),
+ const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(),
MVT::Flag);
SmallVector<SDOperand, 4> Ops;
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
@@ -4625,16 +4784,18 @@
SelectionDAG &DAG) {
assert(Subtarget->isTargetCygMing() &&
"This should be used only on Cygwin/Mingw targets");
-
+
// Get the inputs.
SDOperand Chain = Op.getOperand(0);
SDOperand Size = Op.getOperand(1);
// FIXME: Ensure alignment here
SDOperand Flag;
-
- MVT::ValueType IntPtr = getPointerTy();
- MVT::ValueType SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ MVT IntPtr = getPointerTy();
+ MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0));
Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
@@ -4643,17 +4804,20 @@
SDOperand Ops[] = { Chain,
DAG.getTargetExternalSymbol("_alloca", IntPtr),
DAG.getRegister(X86::EAX, IntPtr),
+ DAG.getRegister(X86StackPtr, SPTy),
Flag };
- Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 5);
Flag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(0),
+ DAG.getIntPtrConstant(0),
+ Flag);
+
Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
-
- std::vector<MVT::ValueType> Tys;
- Tys.push_back(SPTy);
- Tys.push_back(MVT::Other);
+
SDOperand Ops1[2] = { Chain.getValue(0), Chain };
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
+ return DAG.getMergeValues(Ops1, 2);
}
SDOperand
@@ -4661,7 +4825,7 @@
SDOperand Chain,
SDOperand Dst, SDOperand Src,
SDOperand Size, unsigned Align,
- const Value *DstSV, uint64_t DstOff) {
+ const Value *DstSV, uint64_t DstSVOff) {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
/// If not DWORD aligned or size is more than the threshold, call the library.
@@ -4676,7 +4840,7 @@
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
if (const char *bzeroEntry =
V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
- MVT::ValueType IntPtr = getPointerTy();
+ MVT IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -4698,7 +4862,7 @@
uint64_t SizeVal = ConstantSize->getValue();
SDOperand InFlag(0, 0);
- MVT::ValueType AVT;
+ MVT AVT;
SDOperand Count;
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
@@ -4732,8 +4896,8 @@
break;
}
- if (AVT > MVT::i8) {
- unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
Count = DAG.getIntPtrConstant(SizeVal / UBytes);
BytesLeft = SizeVal % UBytes;
}
@@ -4765,7 +4929,7 @@
if (TwoRepStos) {
InFlag = Chain.getValue(1);
Count = Size;
- MVT::ValueType CVT = Count.getValueType();
+ MVT CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
@@ -4780,15 +4944,15 @@
} else if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
- MVT::ValueType AddrVT = Dst.getValueType();
- MVT::ValueType SizeVT = Size.getValueType();
+ MVT AddrVT = Dst.getValueType();
+ MVT SizeVT = Size.getValueType();
Chain = DAG.getMemset(Chain,
DAG.getNode(ISD::ADD, AddrVT, Dst,
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
- Align, DstSV, Offset);
+ Align, DstSV, DstSVOff + Offset);
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -4801,8 +4965,8 @@
SDOperand Dst, SDOperand Src,
SDOperand Size, unsigned Align,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstOff,
- const Value *SrcSV, uint64_t SrcOff){
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
// This requires the copy size to be a constant, preferrably
// within a subtarget-specific limit.
@@ -4813,9 +4977,7 @@
if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
return SDOperand();
- SmallVector<SDOperand, 4> Results;
-
- MVT::ValueType AVT;
+ MVT AVT;
unsigned BytesLeft = 0;
if (Align >= 8 && Subtarget->is64Bit())
AVT = MVT::i64;
@@ -4826,7 +4988,7 @@
else
AVT = MVT::i8;
- unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;
SDOperand Count = DAG.getIntPtrConstant(CountVal);
BytesLeft = SizeVal % UBytes;
@@ -4847,25 +5009,25 @@
Ops.push_back(Chain);
Ops.push_back(DAG.getValueType(AVT));
Ops.push_back(InFlag);
- Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()));
+ SDOperand RepMovs = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
+ SmallVector<SDOperand, 4> Results;
+ Results.push_back(RepMovs);
if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
- MVT::ValueType DstVT = Dst.getValueType();
- MVT::ValueType SrcVT = Src.getValueType();
- MVT::ValueType SizeVT = Size.getValueType();
-
- Results.push_back(DAG.getMemcpy(Chain,
+ MVT DstVT = Dst.getValueType();
+ MVT SrcVT = Src.getValueType();
+ MVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain,
DAG.getNode(ISD::ADD, DstVT, Dst,
- DAG.getConstant(Offset,
- DstVT)),
+ DAG.getConstant(Offset, DstVT)),
DAG.getNode(ISD::ADD, SrcVT, Src,
- DAG.getConstant(Offset,
- SrcVT)),
+ DAG.getConstant(Offset, SrcVT)),
DAG.getConstant(BytesLeft, SizeVT),
Align, AlwaysInline,
- DstSV, Offset, SrcSV, Offset));
+ DstSV, DstSVOff + Offset,
+ SrcSV, SrcSVOff + Offset));
}
return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size());
@@ -4886,8 +5048,7 @@
DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1)
};
- Tys = DAG.getVTList(MVT::i64, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
+ return DAG.getMergeValues(Ops, 2).Val;
}
SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
@@ -4899,8 +5060,7 @@
// Use a MERGE_VALUES to return the value and chain.
Ops[1] = edx.getValue(1);
- Tys = DAG.getVTList(MVT::i64, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
+ return DAG.getMergeValues(Ops, 2).Val;
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
@@ -4947,6 +5107,18 @@
return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
}
+SDOperand X86TargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand SrcPtr = Op.getOperand(1);
+ SDOperand SrcSV = Op.getOperand(2);
+
+ assert(0 && "VAArgInst is not yet implemented for x86-64!");
+ abort();
+ return SDOperand();
+}
+
SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
@@ -4966,7 +5138,7 @@
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
switch (IntNo) {
default: return SDOperand(); // Don't custom lower most intrinsics.
- // Comparison intrinsics.
+ // Comparison intrinsics.
case Intrinsic::x86_sse_comieq_ss:
case Intrinsic::x86_sse_comilt_ss:
case Intrinsic::x86_sse_comile_ss:
@@ -5067,6 +5239,95 @@
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
}
+
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDOperand ShAmt = Op.getOperand(2);
+ if (isa<ConstantSDNode>(ShAmt))
+ return SDOperand();
+
+ unsigned NewIntNo = 0;
+ MVT ShAmtVT = MVT::v4i32;
+ switch (IntNo) {
+ case Intrinsic::x86_sse2_pslli_w:
+ NewIntNo = Intrinsic::x86_sse2_psll_w;
+ break;
+ case Intrinsic::x86_sse2_pslli_d:
+ NewIntNo = Intrinsic::x86_sse2_psll_d;
+ break;
+ case Intrinsic::x86_sse2_pslli_q:
+ NewIntNo = Intrinsic::x86_sse2_psll_q;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ break;
+ case Intrinsic::x86_sse2_psrli_d:
+ NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ break;
+ case Intrinsic::x86_sse2_psrli_q:
+ NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ NewIntNo = Intrinsic::x86_sse2_psra_w;
+ break;
+ case Intrinsic::x86_sse2_psrai_d:
+ NewIntNo = Intrinsic::x86_sse2_psra_d;
+ break;
+ default: {
+ ShAmtVT = MVT::v2i32;
+ switch (IntNo) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: abort(); // Can't reach here.
+ }
+ break;
+ }
+ }
+ MVT VT = Op.getValueType();
+ ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt));
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(NewIntNo, MVT::i32),
+ Op.getOperand(1), ShAmt);
+ }
}
}
@@ -5143,10 +5404,8 @@
const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
- const unsigned char N86R10 =
- ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
- const unsigned char N86R11 =
- ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+ const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+ const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
@@ -5182,7 +5441,7 @@
SDOperand Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) };
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
+ return DAG.getMergeValues(Ops, 2);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
@@ -5233,8 +5492,7 @@
Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
- const unsigned char N86Reg =
- ((const X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
+ const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, TrmpAddr, 0);
@@ -5251,7 +5509,7 @@
SDOperand Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
+ return DAG.getMergeValues(Ops, 2);
}
}
@@ -5279,7 +5537,7 @@
const TargetMachine &TM = MF.getTarget();
const TargetFrameInfo &TFI = *TM.getFrameInfo();
unsigned StackAlignment = TFI.getStackAlignment();
- MVT::ValueType VT = Op.getValueType();
+ MVT VT = Op.getValueType();
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
@@ -5311,14 +5569,14 @@
DAG.getConstant(3, MVT::i16));
- return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
}
SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType OpVT = VT;
- unsigned NumBits = MVT::getSizeInBits(VT);
+ MVT VT = Op.getValueType();
+ MVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
Op = Op.getOperand(0);
if (VT == MVT::i8) {
@@ -5348,9 +5606,9 @@
}
SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- MVT::ValueType OpVT = VT;
- unsigned NumBits = MVT::getSizeInBits(VT);
+ MVT VT = Op.getValueType();
+ MVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
Op = Op.getOperand(0);
if (VT == MVT::i8) {
@@ -5375,11 +5633,13 @@
return Op;
}
-SDOperand X86TargetLowering::LowerLCS(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType T = cast<AtomicSDNode>(Op.Val)->getVT();
+SDOperand X86TargetLowering::LowerCMP_SWAP(SDOperand Op, SelectionDAG &DAG) {
+ MVT T = Op.getValueType();
unsigned Reg = 0;
unsigned size = 0;
- switch(T) {
+ switch(T.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
@@ -5387,7 +5647,7 @@
if (Subtarget->is64Bit()) {
Reg = X86::RAX; size = 8;
} else //Should go away when LowerType stuff lands
- return SDOperand(ExpandATOMIC_LCS(Op.Val, DAG), 0);
+ return SDOperand(ExpandATOMIC_CMP_SWAP(Op.Val, DAG), 0);
break;
};
SDOperand cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg,
@@ -5404,9 +5664,9 @@
return cpOut;
}
-SDNode* X86TargetLowering::ExpandATOMIC_LCS(SDNode* Op, SelectionDAG &DAG) {
- MVT::ValueType T = cast<AtomicSDNode>(Op)->getVT();
- assert (T == MVT::i64 && "Only know how to expand i64 CAS");
+SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, SelectionDAG &DAG) {
+ MVT T = Op->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
SDOperand cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3),
DAG.getConstant(0, MVT::i32));
@@ -5436,8 +5696,19 @@
cpOutL.getValue(2));
SDOperand OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
SDOperand ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2);
- Tys = DAG.getVTList(MVT::i64, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val;
+ SDOperand Vals[2] = { ResultVal, cpOutH.getValue(1) };
+ return DAG.getMergeValues(Vals, 2).Val;
+}
+
+SDNode* X86TargetLowering::ExpandATOMIC_LOAD_SUB(SDNode* Op, SelectionDAG &DAG) {
+ MVT T = Op->getValueType(0);
+ assert (T == MVT::i32 && "Only know how to expand i32 Atomic Load Sub");
+ SDOperand negOp = DAG.getNode(ISD::SUB, T,
+ DAG.getConstant(0, T), Op->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, Op->getOperand(0),
+ Op->getOperand(1), negOp,
+ cast<AtomicSDNode>(Op)->getSrcValue(),
+ cast<AtomicSDNode>(Op)->getAlignment()).Val;
}
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -5445,7 +5716,7 @@
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Should not custom lower this!");
- case ISD::ATOMIC_LCS: return LowerLCS(Op,DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
@@ -5471,6 +5742,7 @@
case ISD::RET: return LowerRET(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -5490,13 +5762,15 @@
}
}
-/// ExpandOperation - Provide custom lowering hooks for expanding operations.
-SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) {
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+SDNode *X86TargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) {
switch (N->getOpcode()) {
default: assert(0 && "Should not custom lower this!");
case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
- case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG);
+ case ISD::ATOMIC_CMP_SWAP: return ExpandATOMIC_CMP_SWAP(N, DAG);
+ case ISD::ATOMIC_LOAD_SUB: return ExpandATOMIC_LOAD_SUB(N,DAG);
}
}
@@ -5546,8 +5820,12 @@
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
- case X86ISD::LCMPXCHG_DAG: return "x86ISD::LCMPXCHG_DAG";
- case X86ISD::LCMPXCHG8_DAG: return "x86ISD::LCMPXCHG8_DAG";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
}
}
@@ -5610,12 +5888,11 @@
return Subtarget->is64Bit() || NumBits1 < 64;
}
-bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
- MVT::ValueType VT2) const {
- if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2))
+bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
return false;
- unsigned NumBits1 = MVT::getSizeInBits(VT1);
- unsigned NumBits2 = MVT::getSizeInBits(VT2);
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
if (NumBits1 <= NumBits2)
return false;
return Subtarget->is64Bit() || NumBits1 < 64;
@@ -5626,9 +5903,9 @@
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
-X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
+X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT VT) const {
// Only do shuffles on 128-bit vector types for now.
- if (MVT::getSizeInBits(VT) == 64) return false;
+ if (VT.getSizeInBits() == 64) return false;
return (Mask.Val->getNumOperands() <= 4 ||
isIdentityMask(Mask.Val) ||
isIdentityMask(Mask.Val, true) ||
@@ -5642,11 +5919,10 @@
bool
X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps,
- MVT::ValueType EVT,
- SelectionDAG &DAG) const {
+ MVT EVT, SelectionDAG &DAG) const {
unsigned NumElts = BVOps.size();
// Only do shuffles on 128-bit vector types for now.
- if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
+ if (EVT.getSizeInBits() * NumElts == 64) return false;
if (NumElts == 2) return true;
if (NumElts == 4) {
return (isMOVLMask(&BVOps[0], 4) ||
@@ -5661,6 +5937,195 @@
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpc,
+ unsigned immOpc,
+ bool invSrc) {
+ // For the atomic bitwise operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [bitinstr.addr]
+ // op t2 = t1, [bitinstr.val]
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
+ F->getBasicBlockList().insert(MBBIter, newMBB);
+ F->getBasicBlockList().insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(bInstr->getNumOperands() < 8 && "unexpected number of operands");
+ MachineOperand& destOper = bInstr->getOperand(0);
+ MachineOperand* argOpers[6];
+ int numArgs = bInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &bInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = 3; // [0,3]
+ int valArgIndx = 4;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ unsigned tt = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, TII->get(X86::NOT32r), tt).addReg(t1);
+ }
+ else
+ tt = t1;
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
+ && "invalid operand");
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, TII->get(regOpc), t2);
+ else
+ MIB = BuildMI(newMBB, TII->get(immOpc), t2);
+ MIB.addReg(tt);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
+
+ delete bInstr; // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+ MachineBasicBlock *MBB,
+ unsigned cmovOpc) {
+ // For the atomic min/max operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [min/max.addr]
+ // mov t2 = [min/max.val]
+ // cmp t1, t2
+ // cmov[cond] t2 = t1
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ //
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
+ F->getBasicBlockList().insert(MBBIter, newMBB);
+ F->getBasicBlockList().insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to newMBB and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(mInstr->getNumOperands() < 8 && "unexpected number of operands");
+ MachineOperand& destOper = mInstr->getOperand(0);
+ MachineOperand* argOpers[6];
+ int numArgs = mInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &mInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = 3; // [0,3]
+ int valArgIndx = 4;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ // We only support register and immediate values
+ assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
+ && "invalid operand");
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
+ else
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, TII->get(X86::CMP32rr));
+ MIB.addReg(t1);
+ MIB.addReg(t2);
+
+ // Generate movc
+ unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MIB = BuildMI(newMBB, TII->get(cmovOpc),t3);
+ MIB.addReg(t2);
+ MIB.addReg(t1);
+
+ // Cmp and exchange if none has modified the memory location
+ MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t3);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
+
+ delete mInstr; // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) {
@@ -5695,15 +6160,11 @@
MachineFunction *F = BB->getParent();
F->getBasicBlockList().insert(It, copy0MBB);
F->getBasicBlockList().insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
+ // Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- sinkMBB->addSuccessor(*i);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+ sinkMBB->transferSuccessors(BB);
+
+ // Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
@@ -5803,6 +6264,26 @@
delete MI; // The pseudo instruction is gone now.
return BB;
}
+ case X86::ATOMAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri);
+ case X86::ATOMOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+ X86::OR32ri);
+ case X86::ATOMXOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+ X86::XOR32ri);
+ case X86::ATOMNAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, true);
+ case X86::ATOMMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+ case X86::ATOMMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+ case X86::ATOMUMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+ case X86::ATOMUMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
}
}
@@ -5834,140 +6315,130 @@
}
}
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
-/// element of the result of the vector shuffle.
-static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
- MVT::ValueType VT = N->getValueType(0);
- SDOperand PermMask = N->getOperand(2);
- unsigned NumElems = PermMask.getNumOperands();
- SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
- i %= NumElems;
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return (i == 0)
- ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
- } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
- SDOperand Idx = PermMask.getOperand(i);
- if (Idx.getOpcode() == ISD::UNDEF)
- return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
- return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
- }
- return SDOperand();
-}
-
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-/// node is a GlobalAddress + an offset.
-static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
- unsigned Opc = N->getOpcode();
- if (Opc == X86ISD::Wrapper) {
- if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ GlobalValue* &GA, int64_t &Offset) const{
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
return true;
}
- } else if (Opc == ISD::ADD) {
- SDOperand N1 = N->getOperand(0);
- SDOperand N2 = N->getOperand(1);
- if (isGAPlusOffset(N1.Val, GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
- if (V) {
- Offset += V->getSignExtended();
- return true;
- }
- } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
- if (V) {
- Offset += V->getSignExtended();
- return true;
- }
- }
- }
- return false;
-}
-
-/// isConsecutiveLoad - Returns true if N is loading from an address of Base
-/// + Dist * Size.
-static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
- MachineFrameInfo *MFI) {
- if (N->getOperand(0).Val != Base->getOperand(0).Val)
- return false;
-
- SDOperand Loc = N->getOperand(1);
- SDOperand BaseLoc = Base->getOperand(1);
- if (Loc.getOpcode() == ISD::FrameIndex) {
- if (BaseLoc.getOpcode() != ISD::FrameIndex)
- return false;
- int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
- int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI->getObjectSize(FI);
- int BFS = MFI->getObjectSize(BFI);
- if (FS != BFS || FS != Size) return false;
- return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
- } else {
- GlobalValue *GV1 = NULL;
- GlobalValue *GV2 = NULL;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
- bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
- bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
- if (isGA1 && isGA2 && GV1 == GV2)
- return Offset1 == (Offset2 + Dist*Size);
}
-
- return false;
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
- const X86Subtarget *Subtarget) {
+static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
+ const TargetLowering &TLI) {
GlobalValue *GV;
int64_t Offset = 0;
- if (isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
+ if (TLI.isGAPlusOffset(Base, GV, Offset))
+ return (GV->getAlignment() >= N && (Offset % N) == 0);
// DAG combine handles the stack object case.
return false;
}
+static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
+ unsigned NumElems, MVT EVT,
+ SDNode *&Base,
+ SelectionDAG &DAG, MachineFrameInfo *MFI,
+ const TargetLowering &TLI) {
+ Base = NULL;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDOperand Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ if (!Base)
+ return false;
+ continue;
+ }
+
+ SDOperand Elt = DAG.getShuffleScalarElt(N, i);
+ if (!Elt.Val ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
+ return false;
+ if (!Base) {
+ Base = Elt.Val;
+ if (Base->getOpcode() == ISD::UNDEF)
+ return false;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (!TLI.isConsecutiveLoad(Elt.Val, Base,
+ EVT.getSizeInBits()/8, i, MFI))
+ return false;
+ }
+ return true;
+}
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MVT::ValueType VT = N->getValueType(0);
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ const TargetLowering &TLI) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT.getVectorElementType();
SDOperand PermMask = N->getOperand(2);
- int NumElems = (int)PermMask.getNumOperands();
+ unsigned NumElems = PermMask.getNumOperands();
SDNode *Base = NULL;
- for (int i = 0; i < NumElems; ++i) {
- SDOperand Idx = PermMask.getOperand(i);
- if (Idx.getOpcode() == ISD::UNDEF) {
- if (!Base) return SDOperand();
- } else {
- SDOperand Arg =
- getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
- if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
- return SDOperand();
- if (!Base)
- Base = Arg.Val;
- else if (!isConsecutiveLoad(Arg.Val, Base,
- i, MVT::getSizeInBits(EVT)/8,MFI))
- return SDOperand();
- }
- }
+ if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
+ DAG, MFI, TLI))
+ return SDOperand();
- bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
LoadSDNode *LD = cast<LoadSDNode>(Base);
- if (isAlign16) {
+ if (isBaseAlignmentOfN(16, Base->getOperand(1).Val, TLI))
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
LD->getSrcValueOffset(), LD->isVolatile());
- } else {
- return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->isVolatile(),
- LD->getAlignment());
- }
+ return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->isVolatile(),
+ LD->getAlignment());
}
+/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
+static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget,
+ const TargetLowering &TLI) {
+ unsigned NumOps = N->getNumOperands();
+
+ // Ignore single operand BUILD_VECTOR.
+ if (NumOps == 1)
+ return SDOperand();
+
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+ // We are looking for load i64 and zero extend. We want to transform
+ // it before legalizer has a chance to expand it. Also look for i64
+ // BUILD_PAIR bit casted to f64.
+ return SDOperand();
+ // This must be an insertion into a zero vector.
+ SDOperand HighElt = N->getOperand(1);
+ if (!isZeroNode(HighElt))
+ return SDOperand();
+
+ // Value must be a load.
+ SDNode *Base = N->getOperand(0).Val;
+ if (!isa<LoadSDNode>(Base)) {
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
+ return SDOperand();
+ Base = Base->getOperand(0).Val;
+ if (!isa<LoadSDNode>(Base))
+ return SDOperand();
+ }
+
+ // Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+
+ // Load must not be an extload.
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDOperand();
+
+ return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
+}
+
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -6041,14 +6512,15 @@
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
-static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
+static SDOperand PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
- if (MVT::isVector(St->getValue().getValueType()) &&
- MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ if (St->getValue().getValueType().isVector() &&
+ St->getValue().getValueType().getSizeInBits() == 64 &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
@@ -6093,7 +6565,7 @@
// Otherwise, lower to two 32-bit copies.
SDOperand LoAddr = Ld->getBasePtr();
SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
- DAG.getConstant(MVT::i32, 4));
+ DAG.getConstant(4, MVT::i32));
SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr,
Ld->getSrcValue(), Ld->getSrcValueOffset(),
@@ -6113,7 +6585,7 @@
LoAddr = St->getBasePtr();
HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
- DAG.getConstant(MVT::i32, 4));
+ DAG.getConstant(4, MVT::i32));
SDOperand LoSt = DAG.getStore(NewChain, LoLd, LoAddr,
St->getSrcValue(), St->getSrcValueOffset(),
@@ -6162,10 +6634,11 @@
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::BUILD_VECTOR:
+ return PerformBuildVectorCombine(N, DAG, Subtarget, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
- case ISD::STORE:
- return PerformSTORECombine(cast<StoreSDNode>(N), DAG, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
@@ -6205,17 +6678,18 @@
/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
-void X86TargetLowering::lowerXConstraint(MVT::ValueType ConstraintVT,
- std::string& s) const {
- if (MVT::isFloatingPoint(ConstraintVT)) {
+const char *X86TargetLowering::
+LowerXConstraint(MVT ConstraintVT) const {
+ // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+ // 'f' like normal targets.
+ if (ConstraintVT.isFloatingPoint()) {
if (Subtarget->hasSSE2())
- s = "Y";
- else if (Subtarget->hasSSE1())
- s = "x";
- else
- s = "f";
- } else
- return TargetLowering::lowerXConstraint(ConstraintVT, s);
+ return "Y";
+ if (Subtarget->hasSSE1())
+ return "x";
+ }
+
+ return TargetLowering::LowerXConstraint(ConstraintVT);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -6223,7 +6697,7 @@
void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
char Constraint,
std::vector<SDOperand>&Ops,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG) const {
SDOperand Result(0, 0);
switch (Constraint) {
@@ -6301,7 +6775,7 @@
std::vector<unsigned> X86TargetLowering::
getRegClassForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// FIXME: not handling fp-stack yet!
switch (Constraint[0]) { // GCC X86 Constraint Letters
@@ -6329,7 +6803,7 @@
std::pair<unsigned, const TargetRegisterClass*>
X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const {
+ MVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
if (Constraint.size() == 1) {
@@ -6365,8 +6839,8 @@
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed
if (!Subtarget->hasSSE1()) break;
-
- switch (VT) {
+
+ switch (VT.getSimpleVT()) {
default: break;
// Scalar SSE types.
case MVT::f32:
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86ISelLowering.h Sun Jul 6 15:45:41 2008
@@ -181,10 +181,10 @@
/// in order to obtain suitable precision.
FRSQRT, FRCP,
- // Thread Local Storage
+ // TLSADDR, THREAThread - Thread Local Storage.
TLSADDR, THREAD_POINTER,
- // Exception Handling helpers
+ // EH_RETURN - Exception Handling helpers.
EH_RETURN,
/// TC_RETURN - Tail call return.
@@ -194,12 +194,21 @@
/// operand #3 optional in flag
TC_RETURN,
- // compare and swap
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
LCMPXCHG_DAG,
LCMPXCHG8_DAG,
- // Store FP control world into i16 memory
- FNSTCW16m
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ // VZEXT_MOVL - Vector move low and zero extend.
+ VZEXT_MOVL,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // VSHL, VSRL - Vector logical left / right shift.
+ VSHL, VSRL
};
}
@@ -305,7 +314,7 @@
int BytesCallerReserves; // Number of arg bytes caller makes.
public:
- explicit X86TargetLowering(TargetMachine &TM);
+ explicit X86TargetLowering(X86TargetMachine &TM);
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
@@ -330,15 +339,23 @@
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+ /// determining it.
+ virtual
+ MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
- /// ExpandOperation - Custom lower the specified operation, splitting the
- /// value into two pieces.
+ /// ReplaceNodeResults - Replace a node with an illegal result type
+ /// with a new node built out of custom code.
///
- virtual SDNode *ExpandOperationResult(SDNode *N, SelectionDAG &DAG);
+ virtual SDNode *ReplaceNodeResults(SDNode *N, SelectionDAG &DAG);
virtual SDOperand PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -346,12 +363,13 @@
virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB);
+
/// getTargetNodeName - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT::ValueType getSetCCResultType(const SDOperand &) const;
+ virtual MVT getSetCCResultType(const SDOperand &) const;
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
@@ -362,6 +380,9 @@
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+
+ virtual bool
+ isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG);
@@ -369,17 +390,16 @@
std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const;
+ MVT VT) const;
- virtual void lowerXConstraint(MVT::ValueType ConstraintVT,
- std::string&) const;
+ virtual const char *LowerXConstraint(MVT ConstraintVT) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
virtual void LowerAsmOperandForConstraint(SDOperand Op,
char ConstraintLetter,
std::vector<SDOperand> &Ops,
- SelectionDAG &DAG);
+ SelectionDAG &DAG) const;
/// getRegForInlineAsmConstraint - Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
@@ -387,7 +407,7 @@
/// error, this returns a register number of 0.
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const;
+ MVT VT) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
@@ -397,26 +417,25 @@
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
- virtual bool isTruncateFree(MVT::ValueType VT1, MVT::ValueType VT2) const;
+ virtual bool isTruncateFree(MVT VT1, MVT VT2) const;
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
/// values are assumed to be legal.
- virtual bool isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const;
+ virtual bool isShuffleMaskLegal(SDOperand Mask, MVT VT) const;
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
/// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry.
virtual bool isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps,
- MVT::ValueType EVT,
- SelectionDAG &DAG) const;
+ MVT EVT, SelectionDAG &DAG) const;
/// ShouldShrinkFPConstant - If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
/// in order to save space and / or reduce runtime.
- virtual bool ShouldShrinkFPConstant(MVT::ValueType VT) const {
+ virtual bool ShouldShrinkFPConstant(MVT VT) const {
// Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
// expensive than a straight movsd. On the other hand, it's important to
// shrink long double fp constant since fldt is very slow.
@@ -436,7 +455,7 @@
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
- bool isScalarFPTypeInSSEReg(MVT::ValueType VT) const {
+ bool isScalarFPTypeInSSEReg(MVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
@@ -445,7 +464,7 @@
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- const TargetRegisterInfo *RegInfo;
+ const X86RegisterInfo *RegInfo;
/// X86StackPtr - X86 physical register used as stack ptr.
unsigned X86StackPtr;
@@ -476,15 +495,6 @@
SDOperand EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDOperand &OutRetAddr,
SDOperand Chain, bool IsTailCall, bool Is64Bit,
int FPDiff);
-
- bool CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg,
- SmallVector< std::pair<unsigned, unsigned>,8> &TailCallByValClobberedVRegs,
- SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes,
- std::pair<unsigned, SDOperand> &RegToPass,
- SDOperand &OutChain,
- SDOperand &OutFlag,
- MachineFunction &MF,
- SelectionDAG & DAG);
CCAssignFn *CCAssignFnForNode(SDOperand Op) const;
NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDOperand Op);
@@ -520,6 +530,7 @@
SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerVAARG(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerVACOPY(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG);
@@ -530,23 +541,41 @@
SDOperand LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCTLZ(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCTTZ(SDOperand Op, SelectionDAG &DAG);
- SDOperand LowerLCS(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerCMP_SWAP(SDOperand Op, SelectionDAG &DAG);
SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG);
SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG);
- SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG);
-
+ SDNode *ExpandATOMIC_CMP_SWAP(SDNode *N, SelectionDAG &DAG);
+ SDNode *ExpandATOMIC_LOAD_SUB(SDNode *N, SelectionDAG &DAG);
+
SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG,
SDOperand Chain,
SDOperand Dst, SDOperand Src,
SDOperand Size, unsigned Align,
- const Value *DstSV, uint64_t DstOff);
+ const Value *DstSV, uint64_t DstSVOff);
SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
SDOperand Chain,
SDOperand Dst, SDOperand Src,
SDOperand Size, unsigned Align,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstOff,
- const Value *SrcSV, uint64_t SrcOff);
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff);
+
+ /// Utility function to emit atomic bitwise operations (and, or, xor).
+ // It takes the bitwise instruction to expand, the associated machine basic
+ // block, and the associated X86 opcodes for reg/reg and reg/imm.
+ MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpc,
+ unsigned immOpc,
+ bool invSrc = false);
+
+ /// Utility function to emit atomic min and max. It takes the min/max
+ // instruction to expand, the associated basic block, and the associated
+ // cmov opcode for moving the min or max value.
+ MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned cmovOpc);
};
}
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86Instr64bit.td?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86Instr64bit.td Sun Jul 6 15:45:41 2008
@@ -101,7 +101,7 @@
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
"call\t{*}$dst", [(X86call GR64:$dst)]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
- "call\t{*}$dst", []>;
+ "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
}
@@ -199,7 +199,7 @@
def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
"movabs{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, imm:$src)]>;
@@ -751,7 +751,7 @@
[(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
let isTwoAddress = 1 in {
-let isCommutable = 1 in
+let isCommutable = 1, isAsCheapAsAMove = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
@@ -1091,7 +1091,8 @@
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
// when we have a better way to specify isel priority.
-let Defs = [EFLAGS], AddedComplexity = 1, isReMaterializable = 1 in
+let Defs = [EFLAGS], AddedComplexity = 1,
+ isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : RI<0x31, MRMInitReg, (outs GR64:$dst), (ins),
"xor{l}\t${dst:subreg32}, ${dst:subreg32}",
[(set GR64:$dst, 0)]>;
@@ -1102,6 +1103,13 @@
"mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
[(set GR64:$dst, i64immZExt32:$src)]>;
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//===----------------------------------------------------------------------===//
+
+def TLS_addr64 : I<0, Pseudo, (outs GR64:$dst), (ins i64imm:$sym),
+ ".byte\t0x66; leaq\t${sym:mem}(%rip), $dst; .word\t0x6666; rex64",
+ [(set GR64:$dst, (X86tlsaddr tglobaltlsaddr:$sym))]>;
//===----------------------------------------------------------------------===//
// Atomic Instructions
@@ -1116,7 +1124,7 @@
let Constraints = "$val = $dst", Defs = [EFLAGS] in {
def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
"lock xadd $val, $ptr",
- [(set GR64:$dst, (atomic_las_64 addr:$ptr, GR64:$val))]>,
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
TB, LOCK;
def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
"xchg $val, $ptr",
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86InstrBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86InstrBuilder.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86InstrBuilder.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86InstrBuilder.h Sun Jul 6 15:45:41 2008
@@ -70,15 +70,19 @@
/// displacement. An example is: DWORD PTR [EAX + 4].
///
inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB,
- unsigned Reg, int Offset) {
- return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset);
+ unsigned Reg, bool isKill,
+ int Offset) {
+ return MIB.addReg(Reg, false, false, isKill)
+ .addImm(1).addReg(0).addImm(Offset);
}
/// addRegReg - This function is used to add a memory reference of the form:
/// [Reg + Reg].
inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
- unsigned Reg1, unsigned Reg2) {
- return MIB.addReg(Reg1).addImm(1).addReg(Reg2).addImm(0);
+ unsigned Reg1, bool isKill1,
+ unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, false, false, isKill1).addImm(1)
+ .addReg(Reg2, false, false, isKill2).addImm(0);
}
inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.cpp Sun Jul 6 15:45:41 2008
@@ -569,8 +569,12 @@
{ X86::PMAXUBrr, X86::PMAXUBrm },
{ X86::PMINSWrr, X86::PMINSWrm },
{ X86::PMINUBrr, X86::PMINUBrm },
+ { X86::PMULDQrr, X86::PMULDQrm },
+ { X86::PMULDQrr_int, X86::PMULDQrm_int },
{ X86::PMULHUWrr, X86::PMULHUWrm },
{ X86::PMULHWrr, X86::PMULHWrm },
+ { X86::PMULLDrr, X86::PMULLDrm },
+ { X86::PMULLDrr_int, X86::PMULLDrm_int },
{ X86::PMULLWrr, X86::PMULLWrm },
{ X86::PMULUDQrr, X86::PMULUDQrm },
{ X86::PORrr, X86::PORrm },
@@ -760,7 +764,8 @@
return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
}
-bool X86InstrInfo::isReallyTriviallyReMaterializable(MachineInstr *MI) const {
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
default: break;
case X86::MOV8rm:
@@ -827,6 +832,40 @@
return true;
}
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// two instructions it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ // For compile time consideration, if we are not able to determine the
+ // safety after visiting 2 instructions, we will assume it's not safe.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (I == MBB.end())
+ // Reached end of block, it's safe.
+ return true;
+ bool SeenDef = false;
+ for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isRegister())
+ continue;
+ if (MO.getReg() == X86::EFLAGS) {
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ }
+
+ if (SeenDef)
+ // This instruction defines EFLAGS, no need to look any further.
+ return true;
+ ++I;
+ }
+
+ // Conservative answer.
+ return false;
+}
+
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
@@ -841,25 +880,33 @@
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// Re-materialize them as movri instructions to avoid side effects.
+ bool Emitted = false;
switch (Orig->getOpcode()) {
+ default: break;
case X86::MOV8r0:
- BuildMI(MBB, I, get(X86::MOV8ri), DestReg).addImm(0);
- break;
case X86::MOV16r0:
- BuildMI(MBB, I, get(X86::MOV16ri), DestReg).addImm(0);
- break;
case X86::MOV32r0:
- BuildMI(MBB, I, get(X86::MOV32ri), DestReg).addImm(0);
- break;
- case X86::MOV64r0:
- BuildMI(MBB, I, get(X86::MOV64ri32), DestReg).addImm(0);
+ case X86::MOV64r0: {
+ if (!isSafeToClobberEFLAGS(MBB, I)) {
+ unsigned Opc = 0;
+ switch (Orig->getOpcode()) {
+ default: break;
+ case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
+ case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri32; break;
+ }
+ BuildMI(MBB, I, get(Opc), DestReg).addImm(0);
+ Emitted = true;
+ }
break;
- default: {
+ }
+ }
+
+ if (!Emitted) {
MachineInstr *MI = Orig->clone();
MI->getOperand(0).setReg(DestReg);
MBB.insert(I, MI);
- break;
- }
}
if (ChangeSubIdx) {
@@ -931,11 +978,13 @@
MachineInstr *
X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
- LiveVariables &LV) const {
+ LiveVariables *LV) const {
MachineInstr *MI = MBBI;
// All instructions input are two-addr instructions. Get the known operands.
unsigned Dest = MI->getOperand(0).getReg();
unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
@@ -948,51 +997,47 @@
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
- unsigned A = MI->getOperand(0).getReg();
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
- unsigned M = MI->getOperand(3).getImm();
if (B != C) return 0;
- NewMI = BuildMI(get(X86::PSHUFDri), A).addReg(B).addImm(M);
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+ NewMI = BuildMI(get(X86::PSHUFDri)).addReg(A, true, false, false, isDead)
+ .addReg(B, false, false, isKill).addImm(M);
break;
}
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
// the flags produced by a shift yet, so this is safe.
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
-
- NewMI = BuildMI(get(X86::LEA64r), Dest)
- .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+
+ NewMI = BuildMI(get(X86::LEA64r)).addReg(Dest, true, false, false, isDead)
+ .addReg(0).addImm(1 << ShAmt).addReg(Src, false, false, isKill).addImm(0);
break;
}
case X86::SHL32ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
// the flags produced by a shift yet, so this is safe.
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
-
+
unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
X86::LEA64_32r : X86::LEA32r;
- NewMI = BuildMI(get(Opc), Dest)
- .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+ NewMI = BuildMI(get(Opc)).addReg(Dest, true, false, false, isDead)
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, false, false, isKill).addImm(0);
break;
}
case X86::SHL16ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
// the flags produced by a shift yet, so this is safe.
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
-
+
if (DisableLEA16) {
// If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
@@ -1003,31 +1048,36 @@
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
- MachineInstr *Undef = BuildMI(get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *Undef = BuildMI(get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI = BuildMI(get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg).addReg(Src, false, false, isKill)
+ .addImm(X86::SUBREG_16BIT);
- MachineInstr *Ins =
- BuildMI(get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg).addReg(Src).addImm(X86::SUBREG_16BIT);
+ NewMI = BuildMI(get(Opc), leaOutReg).addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, false, false, true).addImm(0);
- NewMI = BuildMI(get(Opc), leaOutReg)
- .addReg(0).addImm(1 << ShAmt).addReg(leaInReg).addImm(0);
-
- MachineInstr *Ext =
- BuildMI(get(X86::EXTRACT_SUBREG), Dest)
- .addReg(leaOutReg).addImm(X86::SUBREG_16BIT);
- Ext->copyKillDeadInfo(MI);
+ MachineInstr *ExtMI = BuildMI(get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, true, false, false, isDead)
+ .addReg(leaOutReg, false, false, true).addImm(X86::SUBREG_16BIT);
MFI->insert(MBBI, Undef);
- MFI->insert(MBBI, Ins); // Insert the insert_subreg
- LV.instructionChanged(MI, NewMI); // Update live variables
- LV.addVirtualRegisterKilled(leaInReg, NewMI);
- MFI->insert(MBBI, NewMI); // Insert the new inst
- LV.addVirtualRegisterKilled(leaOutReg, Ext);
- MFI->insert(MBBI, Ext); // Insert the extract_subreg
- return Ext;
+ MFI->insert(MBBI, InsMI); // Insert the insert_subreg
+ MFI->insert(MBBI, NewMI); // Insert the lea inst
+ MFI->insert(MBBI, ExtMI); // Insert the extract_subreg
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+ return ExtMI;
} else {
- NewMI = BuildMI(get(X86::LEA16r), Dest)
- .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+ NewMI = BuildMI(get(X86::LEA16r)).addReg(Dest, true, false, false, isDead)
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, false, false, isKill).addImm(0);
}
break;
}
@@ -1046,58 +1096,79 @@
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addRegOffset(BuildMI(get(Opc), Dest), Src, 1);
+ NewMI = addRegOffset(BuildMI(get(Opc))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, 1);
break;
}
case X86::INC16r:
case X86::INC64_16r:
if (DisableLEA16) return 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addRegOffset(BuildMI(get(X86::LEA16r), Dest), Src, 1);
+ NewMI = addRegOffset(BuildMI(get(X86::LEA16r))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, 1);
break;
case X86::DEC64r:
case X86::DEC32r: {
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addRegOffset(BuildMI(get(Opc), Dest), Src, -1);
+ NewMI = addRegOffset(BuildMI(get(Opc))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, -1);
break;
}
case X86::DEC16r:
case X86::DEC64_16r:
if (DisableLEA16) return 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addRegOffset(BuildMI(get(X86::LEA16r), Dest), Src, -1);
+ NewMI = addRegOffset(BuildMI(get(X86::LEA16r))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, -1);
break;
case X86::ADD64rr:
case X86::ADD32rr: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addRegReg(BuildMI(get(Opc), Dest), Src,
- MI->getOperand(2).getReg());
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(get(Opc))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
break;
}
- case X86::ADD16rr:
+ case X86::ADD16rr: {
if (DisableLEA16) return 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addRegReg(BuildMI(get(X86::LEA16r), Dest), Src,
- MI->getOperand(2).getReg());
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(get(X86::LEA16r))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
break;
+ }
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
if (MI->getOperand(2).isImmediate())
- NewMI = addRegOffset(BuildMI(get(X86::LEA64r), Dest), Src,
- MI->getOperand(2).getImm());
+ NewMI = addRegOffset(BuildMI(get(X86::LEA64r))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
case X86::ADD32ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
if (MI->getOperand(2).isImmediate()) {
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addRegOffset(BuildMI(get(Opc), Dest), Src,
- MI->getOperand(2).getImm());
+ NewMI = addRegOffset(BuildMI(get(Opc))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, MI->getOperand(2).getImm());
}
break;
case X86::ADD16ri:
@@ -1105,8 +1176,9 @@
if (DisableLEA16) return 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
if (MI->getOperand(2).isImmediate())
- NewMI = addRegOffset(BuildMI(get(X86::LEA16r), Dest), Src,
- MI->getOperand(2).getImm());
+ NewMI = addRegOffset(BuildMI(get(X86::LEA16r))
+ .addReg(Dest, true, false, false, isDead),
+ Src, isKill, MI->getOperand(2).getImm());
break;
case X86::SHL16ri:
if (DisableLEA16) return 0;
@@ -1122,7 +1194,10 @@
unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
: (MIOpc == X86::SHL32ri
? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
- NewMI = addFullAddress(BuildMI(get(Opc), Dest), AM);
+ NewMI = addFullAddress(BuildMI(get(Opc))
+ .addReg(Dest, true, false, false, isDead), AM);
+ if (isKill)
+ NewMI->getOperand(3).setIsKill(true);
}
break;
}
@@ -1132,8 +1207,13 @@
if (!NewMI) return 0;
- NewMI->copyKillDeadInfo(MI);
- LV.instructionChanged(MI, NewMI); // Update live variables
+ if (LV) { // Update live variables
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, NewMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, NewMI);
+ }
+
MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
@@ -1141,7 +1221,8 @@
/// commuteInstruction - We have a few instructions that must be hacked on to
/// commute them.
///
-MachineInstr *X86InstrInfo::commuteInstruction(MachineInstr *MI) const {
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
switch (MI->getOpcode()) {
case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
@@ -1164,6 +1245,7 @@
unsigned A = MI->getOperand(0).getReg();
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
+ bool AisDead = MI->getOperand(0).isDead();
bool BisKill = MI->getOperand(1).isKill();
bool CisKill = MI->getOperand(2).isKill();
// If machine instrs are no longer in two-address forms, update
@@ -1175,7 +1257,8 @@
A = C;
CisKill = false;
}
- return BuildMI(get(Opc), A).addReg(C, false, false, CisKill)
+ return BuildMI(get(Opc)).addReg(A, true, false, false, AisDead)
+ .addReg(C, false, false, CisKill)
.addReg(B, false, false, BisKill).addImm(Size-Amt);
}
case X86::CMOVB16rr:
@@ -1271,7 +1354,7 @@
// Fallthrough intended.
}
default:
- return TargetInstrInfoImpl::commuteInstruction(MI);
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
}
@@ -1455,7 +1538,7 @@
MachineOperand &MO) {
if (MO.isRegister())
MIB = MIB.addReg(MO.getReg(), MO.isDef(), MO.isImplicit(),
- false, false, MO.getSubReg());
+ MO.isKill(), MO.isDead(), MO.getSubReg());
else if (MO.isImmediate())
MIB = MIB.addImm(MO.getImm());
else if (MO.isFrameIndex())
@@ -1717,8 +1800,8 @@
}
void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
unsigned Opc = getLoadRegOpcode(RC, RI.getStackAlignment());
MachineInstrBuilder MIB = BuildMI(get(Opc), DestReg);
@@ -1854,10 +1937,8 @@
NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
else if (MI->getOpcode() == X86::MOV8r0)
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
- if (NewMI) {
- NewMI->copyKillDeadInfo(MI);
+ if (NewMI)
return NewMI;
- }
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (i == 1) {
@@ -1876,7 +1957,6 @@
NewMI = FuseTwoAddrInst(I->second, MOs, MI, *this);
else
NewMI = FuseInst(I->second, i, MOs, MI, *this);
- NewMI->copyKillDeadInfo(MI);
return NewMI;
}
}
@@ -2188,14 +2268,14 @@
// Emit the load instruction.
SDNode *Load = 0;
if (FoldedLoad) {
- MVT::ValueType VT = *RC->vt_begin();
+ MVT VT = *RC->vt_begin();
Load = DAG.getTargetNode(getLoadRegOpcode(RC, RI.getStackAlignment()), VT,
MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
}
// Emit the data processing instruction.
- std::vector<MVT::ValueType> VTs;
+ std::vector<MVT> VTs;
const TargetRegisterClass *DstRC = 0;
if (TID.getNumDefs() > 0) {
const TargetOperandInfo &DstTOI = TID.OpInfo[0];
@@ -2204,7 +2284,7 @@
VTs.push_back(*DstRC->vt_begin());
}
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- MVT::ValueType VT = N->getValueType(i);
+ MVT VT = N->getValueType(i);
if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
VTs.push_back(VT);
}
@@ -2633,7 +2713,8 @@
FinalSize += AI->getInlineAsmLength(AsmStr);
break;
}
- case TargetInstrInfo::LABEL:
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
break;
case TargetInstrInfo::IMPLICIT_DEF:
case TargetInstrInfo::DECLARE:
@@ -2819,7 +2900,7 @@
unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const TargetInstrDesc &Desc = MI->getDesc();
bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
- bool Is64BitMode = ((X86Subtarget*)TM.getSubtargetImpl())->is64Bit();
+ bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
if (Desc.getOpcode() == X86::MOVPC32r) {
Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.h Sun Jul 6 15:45:41 2008
@@ -227,6 +227,23 @@
};
}
+inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImmediate() &&
+ (MO.getImm() == 1 || MO.getImm() == 2 ||
+ MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFrameIndex()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isRegister() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isRegister() &&
+ (MI->getOperand(Op+3).isImmediate() ||
+ MI->getOperand(Op+3).isGlobalAddress() ||
+ MI->getOperand(Op+3).isConstantPoolIndex() ||
+ MI->getOperand(Op+3).isJumpTableIndex());
+}
+
class X86InstrInfo : public TargetInstrInfoImpl {
X86TargetMachine &TM;
const X86RegisterInfo RI;
@@ -250,7 +267,7 @@
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+ virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
// Return true if the instruction is a register to register move and
// leave the source and dest operands in the passed parameters.
@@ -260,7 +277,7 @@
unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
- bool isReallyTriviallyReMaterializable(MachineInstr *MI) const;
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, const MachineInstr *Orig) const;
@@ -278,12 +295,12 @@
///
virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
- LiveVariables &LV) const;
+ LiveVariables *LV) const;
/// commuteInstruction - We have a few instructions that must be hacked on to
/// commute them.
///
- virtual MachineInstr *commuteInstruction(MachineInstr *MI) const;
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
// Branch analysis.
virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.td?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86InstrInfo.td Sun Jul 6 15:45:41 2008
@@ -45,7 +45,7 @@
def SDT_X86CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
-def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
@@ -109,7 +109,7 @@
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def X86TLStp : SDNode<"X86ISD::THREAD_POINTER", SDT_X86TLSTP, []>;
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
@@ -229,9 +229,35 @@
}]>;
// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (ld node:$ptr)), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ }
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (ld node:$ptr)), [{
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ }
+ return false;
+}]>;
+
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
-def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
-def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
@@ -298,7 +324,7 @@
hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
- [/*(X86retflag 0)*/ /*FIXME: Disabled: rdar://5791600*/]>;
+ [(X86retflag 0)]>;
def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
[(X86retflag imm:$amt)]>;
@@ -371,7 +397,7 @@
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
"call\t{*}$dst", [(X86call GR32:$dst)]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
- "call\t{*}$dst", []>;
+ "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
}
// Tail call stuff.
@@ -391,6 +417,7 @@
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+
def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -551,7 +578,7 @@
def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, imm:$src)]>;
@@ -1282,23 +1309,23 @@
def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
"or{l}\t{$src, $dst|$dst, $src}",
[(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
-}
+} // isTwoAddress = 0
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
-def XOR8rr : I<0x30, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, GR8:$src2))]>;
-def XOR16rr : I<0x31, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, GR16:$src2))]>, OpSize;
-def XOR32rr : I<0x31, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
-}
+let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
+ def XOR8rr : I<0x30, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2))]>;
+ def XOR16rr : I<0x31, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2))]>, OpSize;
+ def XOR32rr : I<0x31, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+} // isCommutable = 1
def XOR8rm : I<0x32, MRMSrcMem ,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
@@ -1307,7 +1334,8 @@
def XOR16rm : I<0x33, MRMSrcMem ,
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2)))]>, OpSize;
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2)))]>,
+ OpSize;
def XOR32rm : I<0x33, MRMSrcMem ,
(outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
@@ -1334,6 +1362,7 @@
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2))]>;
+
let isTwoAddress = 0 in {
def XOR8mr : I<0x30, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
@@ -1370,7 +1399,7 @@
(outs), (ins i32mem:$dst, i32i8imm :$src),
"xor{l}\t{$src, $dst|$dst, $src}",
[(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
-}
+} // isTwoAddress = 0
} // Defs = [EFLAGS]
// Shift instructions
@@ -1385,7 +1414,7 @@
def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
"shl{l}\t{%cl, $dst|$dst, %CL}",
[(set GR32:$dst, (shl GR32:$src, CL))]>;
-}
+} // Uses = [CL]
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
@@ -1399,7 +1428,7 @@
[(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
// cheaper.
-}
+} // isConvertibleToThreeAddress = 1
let isTwoAddress = 0 in {
let Uses = [CL] in {
@@ -2455,7 +2484,7 @@
// Alias instructions that map movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-let Defs = [EFLAGS], isReMaterializable = 1 in {
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
"xor{b}\t$dst, $dst",
[(set GR8:$dst, 0)]>;
@@ -2499,12 +2528,12 @@
//
let Uses = [EBX] in
-def TLS_addr : I<0, Pseudo, (outs GR32:$dst), (ins i32imm:$sym),
- "leal\t${sym:mem}(,%ebx,1), $dst",
- [(set GR32:$dst, (X86tlsaddr tglobaltlsaddr:$sym))]>;
+def TLS_addr32 : I<0, Pseudo, (outs GR32:$dst), (ins i32imm:$sym),
+ "leal\t${sym:mem}(,%ebx,1), $dst",
+ [(set GR32:$dst, (X86tlsaddr tglobaltlsaddr:$sym))]>;
let AddedComplexity = 10 in
-def TLS_gs_rr : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
+def TLS_gs_rr : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
"movl\t%gs:($src), $dst",
[(set GR32:$dst, (load (add X86TLStp, GR32:$src)))]>;
@@ -2585,18 +2614,48 @@
let Constraints = "$val = $dst", Defs = [EFLAGS] in {
def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
"lock xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_las_32 addr:$ptr, GR32:$val))]>,
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
TB, LOCK;
def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
"lock xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_las_16 addr:$ptr, GR16:$val))]>,
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
TB, OpSize, LOCK;
def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
"lock xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_las_8 addr:$ptr, GR8:$val))]>,
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
TB, LOCK;
}
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_and addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_or addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_xor addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_nand addr:$ptr, GR32:$val))]>;
+
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_max addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_umin addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSUEDO!",
+ [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
@@ -2758,13 +2817,13 @@
include "X86Instr64bit.td"
//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+// XMM Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
-include "X86InstrMMX.td"
+include "X86InstrSSE.td"
//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
//===----------------------------------------------------------------------===//
-include "X86InstrSSE.td"
+include "X86InstrMMX.td"
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86InstrMMX.td?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86InstrMMX.td Sun Jul 6 15:45:41 2008
@@ -118,7 +118,8 @@
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr, Intrinsic IntId> {
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2> {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -131,11 +132,7 @@
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1,
- (v1i64 (bitconvert
- (v2i32 (vector_shuffle immAllZerosV,
- (v2i32 (scalar_to_vector (i32 imm:$src2))),
- MMX_MOVL_shuffle_mask))))))]>;
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
}
}
@@ -167,7 +164,7 @@
"movd\t{$src, $dst|$dst, $src}", []>;
let neverHasSideEffects = 1 in
-def MMX_MOVD64from64rr : MMXRI<0x6E, MRMSrcReg, (outs GR64:$dst), (ins VR64:$src),
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg, (outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
let neverHasSideEffects = 1 in
@@ -184,13 +181,16 @@
def MMX_MOVDQ2Qrr : MMXID<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
"movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))]>;
+ (v1i64 (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))]>;
def MMX_MOVQ2DQrr : MMXIS<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (bitconvert (v1i64 VR64:$src)))]>;
+ [(set VR128:$dst,
+ (v2i64 (vector_shuffle immAllZerosV,
+ (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))),
+ MOVL_shuffle_mask)))]>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
@@ -200,18 +200,14 @@
// movd to MMX register zero-extends
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (v2i32 (vector_shuffle immAllZerosV,
- (v2i32 (scalar_to_vector GR32:$src)),
- MMX_MOVL_shuffle_mask)))]>;
+ [(set VR64:$dst,
+ (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (v2i32 (vector_shuffle immAllZerosV,
- (v2i32 (scalar_to_vector
- (loadi32 addr:$src))),
- MMX_MOVL_shuffle_mask)))]>;
+ [(set VR64:$dst,
+ (v2i32 (X86vzmovl (v2i32
+ (scalar_to_vector (loadi32 addr:$src))))))]>;
// Arithmetic Instructions
@@ -280,23 +276,29 @@
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
- int_x86_mmx_psrl_w>;
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
- int_x86_mmx_psrl_d>;
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
- int_x86_mmx_psrl_q>;
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
- int_x86_mmx_psll_w>;
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
- int_x86_mmx_psll_d>;
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
- int_x86_mmx_psll_q>;
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
- int_x86_mmx_psra_w>;
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
- int_x86_mmx_psra_d>;
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Shift up / down and insert zero's.
+def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSLLQri VR64:$src, imm:$amt))>;
+def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSRLQri VR64:$src, imm:$amt))>;
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
@@ -526,20 +528,30 @@
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
def : Pat<(store (v2i32 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v2f32 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
def : Pat<(store (v1i64 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
// Bit convert.
def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
@@ -548,6 +560,8 @@
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v2f32 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
@@ -556,6 +570,8 @@
(MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v2f32 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
@@ -564,14 +580,10 @@
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
- def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc,
- (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
- MMX_MOVL_shuffle_mask)),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
- def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc,
- (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
- MMX_MOVL_shuffle_mask)),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
+ def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+ def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
}
// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
@@ -635,3 +647,19 @@
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
(load addr:$src2))),
(MMX_PANDNrm VR64:$src1, addr:$src2)>;
+
+// Move MMX to lower 64-bit of XMM
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+// Move lower 64-bit of XMM to MMX.
+def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86InstrSSE.td?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86InstrSSE.td Sun Jul 6 15:45:41 2008
@@ -47,6 +47,12 @@
def X86insrtps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -157,6 +163,22 @@
return getI32Imm(N->getValue() >> 3);
}]>;
+def SSE_CC_imm : SDNodeXForm<cond, [{
+ unsigned Val;
+ switch (N->get()) {
+ default: Val = 0; assert(0 && "Unexpected CondCode"); break;
+ case ISD::SETOEQ: Val = 0; break;
+ case ISD::SETOLT: Val = 1; break;
+ case ISD::SETOLE: Val = 2; break;
+ case ISD::SETUO: Val = 3; break;
+ case ISD::SETONE: Val = 4; break;
+ case ISD::SETOGE: Val = 5; break;
+ case ISD::SETOGT: Val = 6; break;
+ case ISD::SETO: Val = 7; break;
+ }
+ return getI8Imm(Val);
+}]>;
+
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
// SHUFP* etc. imm.
def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
@@ -251,6 +273,7 @@
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
+
//===----------------------------------------------------------------------===//
// SSE scalar FP Instructions
//===----------------------------------------------------------------------===//
@@ -521,31 +544,36 @@
}
// Scalar operation, reg+mem.
- def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+ def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f32mem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
// Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
// Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
// Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
// Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
+ def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (F32Int VR128:$src1,
sse_load_f32:$src2))]>;
@@ -582,46 +610,53 @@
}
// Scalar operation, reg+mem.
- def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+ def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f32mem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
// Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
// Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
// Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
// Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
+ def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (F32Int VR128:$src1,
sse_load_f32:$src2))]>;
// Vector intrinsic operation, reg+reg.
- def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
// Vector intrinsic operation, reg+mem.
- def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
+ [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>;
}
}
@@ -671,20 +706,21 @@
def MOVLPSrm : PSI<0x12, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (vector_shuffle VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
- MOVLP_shuffle_mask)))]>;
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
+ MOVLP_shuffle_mask)))]>;
def MOVHPSrm : PSI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (vector_shuffle VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
- MOVHP_shuffle_mask)))]>;
+ [(set VR128:$dst,
+ (v4f32 (vector_shuffle VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
+ MOVHP_shuffle_mask)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
+
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -783,7 +819,7 @@
// Vector intrinsic operation, mem
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
}
// Square root.
@@ -850,16 +886,20 @@
let Constraints = "$src1 = $dst" in {
def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- VR128:$src, imm:$cc))]>;
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ VR128:$src, imm:$cc))]>;
def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- (load addr:$src), imm:$cc))]>;
-}
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ (memop addr:$src), imm:$cc))]>;
+}
+def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), VR128:$src2, cond:$cc)),
+ (CMPPSrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
+def : Pat<(v4i32 (vsetcc (v4f32 VR128:$src1), (memop addr:$src2), cond:$cc)),
+ (CMPPSrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
// Shuffle and unpack instructions
let Constraints = "$src1 = $dst" in {
@@ -1007,10 +1047,11 @@
let AddedComplexity = 20 in
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV_bc,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))),
- MOVL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
+ (loadf32 addr:$src))))))]>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (MOVZSS2PSrm addr:$src)>;
//===----------------------------------------------------------------------===//
// SSE2 Instructions
@@ -1074,14 +1115,14 @@
def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtpd2pi
- (load addr:$src)))]>;
+ (memop addr:$src)))]>;
def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvttpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
"cvttpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvttpd2pi
- (load addr:$src)))]>;
+ (memop addr:$src)))]>;
def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"cvtpi2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
@@ -1180,26 +1221,32 @@
// Alias bitwise logical operations using SSE logical ops on packed FP values.
let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in {
- def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
"andpd\t{$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
- def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
"orpd\t{$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
- def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
"xorpd\t{$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
}
-def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
+def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
"andpd\t{$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (X86fand FR64:$src1,
(memopfsf64 addr:$src2)))]>;
-def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
+def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
"orpd\t{$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (X86for FR64:$src1,
(memopfsf64 addr:$src2)))]>;
-def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
+def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
"xorpd\t{$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (X86fxor FR64:$src1,
(memopfsf64 addr:$src2)))]>;
@@ -1298,46 +1345,54 @@
}
// Scalar operation, reg+mem.
- def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
+ def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
// Vector operation, reg+reg.
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
// Vector operation, reg+mem.
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
// Intrinsic operation, reg+reg.
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
// Intrinsic operation, reg+mem.
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (F64Int VR128:$src1,
sse_load_f64:$src2))]>;
// Vector intrinsic operation, reg+reg.
- def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
// Vector intrinsic operation, reg+mem.
- def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
+ [(set VR128:$dst, (V2F64Int VR128:$src1,
+ (memopv2f64 addr:$src2)))]>;
}
}
@@ -1442,7 +1497,7 @@
def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (load addr:$src)))]>;
+ (memop addr:$src)))]>;
// SSE2 packed instructions with XS prefix
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1451,7 +1506,7 @@
def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (load addr:$src)))]>,
+ (memop addr:$src)))]>,
XS, Requires<[HasSSE2]>;
// SSE2 packed instructions with XD prefix
@@ -1462,7 +1517,7 @@
def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (load addr:$src)))]>,
+ (memop addr:$src)))]>,
XD, Requires<[HasSSE2]>;
def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1471,14 +1526,14 @@
def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (load addr:$src)))]>;
+ (memop addr:$src)))]>;
// SSE2 instructions without OpSize prefix
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
TB, Requires<[HasSSE2]>;
-def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
@@ -1487,10 +1542,10 @@
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
-def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (load addr:$src)))]>;
+ (memop addr:$src)))]>;
// Match intrinsics which expect XMM operand(s).
// Aliases for intrinsics
@@ -1594,7 +1649,7 @@
// Vector intrinsic operation, mem
def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
+ [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
// Square root.
@@ -1663,13 +1718,17 @@
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- VR128:$src, imm:$cc))]>;
+ VR128:$src, imm:$cc))]>;
def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- (load addr:$src), imm:$cc))]>;
+ (memop addr:$src), imm:$cc))]>;
}
+def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), VR128:$src2, cond:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, (SSE_CC_imm cond:$cc))>;
+def : Pat<(v2i64 (vsetcc (v2f64 VR128:$src1), (memop addr:$src2), cond:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, (SSE_CC_imm cond:$cc))>;
// Shuffle and unpack instructions
let Constraints = "$src1 = $dst" in {
@@ -1774,6 +1833,21 @@
(bitconvert (memopv2i64 addr:$src2))))]>;
}
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr,
+ Intrinsic IntId, Intrinsic IntId2> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
@@ -1848,64 +1922,24 @@
defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
-defm PSLLW : PDI_binop_rm_int<0xF1, "psllw", int_x86_sse2_psll_w>;
-defm PSLLD : PDI_binop_rm_int<0xF2, "pslld", int_x86_sse2_psll_d>;
-defm PSLLQ : PDI_binop_rm_int<0xF3, "psllq", int_x86_sse2_psll_q>;
-
-defm PSRLW : PDI_binop_rm_int<0xD1, "psrlw", int_x86_sse2_psrl_w>;
-defm PSRLD : PDI_binop_rm_int<0xD2, "psrld", int_x86_sse2_psrl_d>;
-defm PSRLQ : PDI_binop_rm_int<0xD3, "psrlq", int_x86_sse2_psrl_q>;
-
-defm PSRAW : PDI_binop_rm_int<0xE1, "psraw", int_x86_sse2_psra_w>;
-defm PSRAD : PDI_binop_rm_int<0xE2, "psrad", int_x86_sse2_psra_d>;
-
-// Some immediate variants need to match a bit_convert.
-let Constraints = "$src1 = $dst" in {
-def PSLLWri : PDIi8<0x71, MRM6r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psllw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1,
- (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
-def PSLLDri : PDIi8<0x72, MRM6r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "pslld\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1,
- (scalar_to_vector (i32 imm:$src2))))]>;
-def PSLLQri : PDIi8<0x73, MRM6r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psllq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1,
- (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
-
-def PSRLWri : PDIi8<0x71, MRM2r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psrlw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1,
- (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
-def PSRLDri : PDIi8<0x72, MRM2r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psrld\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1,
- (scalar_to_vector (i32 imm:$src2))))]>;
-def PSRLQri : PDIi8<0x73, MRM2r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psrlq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1,
- (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
-
-def PSRAWri : PDIi8<0x71, MRM4r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psraw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1,
- (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
-def PSRADri : PDIi8<0x72, MRM4r, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "psrad\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1,
- (scalar_to_vector (i32 imm:$src2))))]>;
-}
-
-// PSRAQ doesn't exist in SSE[1-3].
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
// 128-bit logical shifts.
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
@@ -1925,6 +1959,12 @@
(v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
}
// Logical
@@ -1954,6 +1994,33 @@
defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETEQ)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETEQ)),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETEQ)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETEQ)),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETEQ)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETEQ)),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), VR128:$src2, SETGT)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (vsetcc (v16i8 VR128:$src1), (memop addr:$src2), SETGT)),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), VR128:$src2, SETGT)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (vsetcc (v8i16 VR128:$src1), (memop addr:$src2), SETGT)),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), VR128:$src2, SETGT)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (vsetcc (v4i32 VR128:$src1), (memop addr:$src2), SETGT)),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+
+
// Pack instructions
defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
@@ -2279,56 +2346,59 @@
// Move to lower bits of a VR128 and zeroing upper bits.
// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in
- def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (vector_shuffle immAllZerosV_bc,
- (v2f64 (scalar_to_vector
- (loadf64 addr:$src))),
- MOVL_shuffle_mask)))]>;
+let AddedComplexity = 20 in {
+def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
+ (loadf64 addr:$src))))))]>;
+
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (MOVZSD2PDrm addr:$src)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (MOVZSD2PDrm addr:$src)>;
+def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
+}
// movd / movq to XMM register zero-extends
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (vector_shuffle immAllZerosV,
- (v4i32 (scalar_to_vector GR32:$src)),
- MOVL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>;
// This is X86-64 only.
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (vector_shuffle immAllZerosV_bc,
- (v2i64 (scalar_to_vector GR64:$src)),
- MOVL_shuffle_mask)))]>;
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>;
}
-// Handle the v2f64 form of 'MOVZQI2PQIrr' for PR2108. FIXME: this would be
-// better written as a dag combine xform.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
- (v2f64 (scalar_to_vector
- (f64 (bitconvert GR64:$src)))),
- MOVL_shuffle_mask)),
- (MOVZQI2PQIrr GR64:$src)>, Requires<[HasSSE2]>;
-
-
let AddedComplexity = 20 in {
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (vector_shuffle immAllZerosV,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))),
- MOVL_shuffle_mask)))]>;
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2i64 (vector_shuffle immAllZerosV_bc,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))),
- MOVL_shuffle_mask)))]>, XS,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>, XS,
Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
}
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
@@ -2336,19 +2406,20 @@
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (vector_shuffle immAllZerosV_bc,
- VR128:$src,
- MOVL_shuffle_mask)))]>,
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
-let AddedComplexity = 20 in
+let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v2i64 (vector_shuffle immAllZerosV_bc,
- (memopv2i64 addr:$src),
- MOVL_shuffle_mask)))]>,
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE3 Instructions
//===----------------------------------------------------------------------===//
@@ -2400,7 +2471,7 @@
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"addsubps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- (load addr:$src2)))]>;
+ (memop addr:$src2)))]>;
def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"addsubpd\t{$src2, $dst|$dst, $src2}",
@@ -2410,7 +2481,7 @@
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"addsubpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- (load addr:$src2)))]>;
+ (memop addr:$src2)))]>;
}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -2425,7 +2496,7 @@
class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -2433,7 +2504,7 @@
class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
let Constraints = "$src1 = $dst" in {
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
@@ -2674,13 +2745,13 @@
defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128, 1>;
+ int_x86_ssse3_phadd_w_128>;
defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128, 1>;
+ int_x86_ssse3_phadd_d_128>;
defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128, 1>;
+ int_x86_ssse3_phadd_sw_128>;
defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
int_x86_ssse3_phsub_w,
int_x86_ssse3_phsub_w_128>;
@@ -2692,7 +2763,7 @@
int_x86_ssse3_phsub_sw_128>;
defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128, 1>;
+ int_x86_ssse3_pmadd_ub_sw_128>;
defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw,
int_x86_ssse3_pmul_hr_sw_128, 1>;
@@ -2717,7 +2788,7 @@
(int_x86_ssse3_palign_r
VR64:$src1, VR64:$src2,
imm:$src3))]>;
- def PALIGNR64rm : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i16imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst,
@@ -2733,7 +2804,7 @@
(int_x86_ssse3_palign_r_128
VR128:$src1, VR128:$src2,
imm:$src3))]>, OpSize;
- def PALIGNR128rm : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i32imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
@@ -2793,12 +2864,12 @@
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
-def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
- (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (vector_shuffle immAllZerosV_bc,
- (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
}
// Splat v2f64 / v2i64
@@ -2903,33 +2974,66 @@
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
-def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memop addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memop addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memop addr:$src2),
MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memop addr:$src2),
MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memop addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
- MOVLP_shuffle_mask)),
- (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memop addr:$src2),
+ MOVHP_shuffle_mask)),
+ (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
}
+// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
+def : Pat<(store (v4f32 (vector_shuffle (memop addr:$src1), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2f64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4f32 (vector_shuffle (memop addr:$src1), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
+ (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2f64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
+ (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(store (v4i32 (vector_shuffle
+ (bc_v4i32 (memopv2i64 addr:$src1)), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2i64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+ MOVLP_shuffle_mask)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4i32 (vector_shuffle
+ (bc_v4i32 (memopv2i64 addr:$src1)), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
+ (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2i64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+ MOVHP_shuffle_mask)), addr:$src1),
+ (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
@@ -2953,37 +3057,8 @@
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
MOVL_shuffle_mask)),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-
-
-// FIXME: Temporary workaround since 2-wide shuffle is broken.
-def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
- (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
- (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
- (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
- (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
- Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
- (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
- Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
- (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
- (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
- (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
- (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
- (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
- (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
- (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
-def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
- (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
// Some special case pandn patterns.
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
@@ -2997,13 +3072,13 @@
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))),
+ (memop addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- (memopv2i64 addr:$src2))),
+ (memop addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- (memopv2i64 addr:$src2))),
+ (memop addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
// vector -> vector casts
@@ -3079,7 +3154,8 @@
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int (load addr:$src1),imm:$src2))]>,
+ [(set VR128:$dst,
+ (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
OpSize;
// Intrinsic operation, reg.
@@ -3111,7 +3187,8 @@
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int (load addr:$src1),imm:$src2))]>,
+ [(set VR128:$dst,
+ (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
OpSize;
}
@@ -3178,19 +3255,18 @@
int_x86_sse41_pmaxud, 1>;
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
int_x86_sse41_pmaxuw, 1>;
-defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq",
- int_x86_sse41_pmuldq, 1>;
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic IntId128, bit Commutable = 0> {
+ multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
+ SDNode OpNode, Intrinsic IntId128,
+ bit Commutable = 0> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode (v4i32 VR128:$src1),
- VR128:$src2))]>, OpSize {
+ [(set VR128:$dst, (OpNode (OpVT VR128:$src1),
+ VR128:$src2))]>, OpSize {
let isCommutable = Commutable;
}
def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
@@ -3204,17 +3280,19 @@
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
- (OpNode VR128:$src1, (memopv4i32 addr:$src2)))]>, OpSize;
+ (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize;
def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
- (IntId128 VR128:$src1, (memopv4i32 addr:$src2)))]>,
+ (IntId128 VR128:$src1, (memop addr:$src2)))]>,
OpSize;
}
}
-defm PMULLD : SS41I_binop_patint<0x40, "pmulld", mul,
+defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
int_x86_sse41_pmulld, 1>;
+defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
+ int_x86_sse41_pmuldq, 1>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
@@ -3252,7 +3330,7 @@
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
int_x86_sse41_dppd, 1>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
- int_x86_sse41_mpsadbw, 0>;
+ int_x86_sse41_mpsadbw, 1>;
/// SS41I_ternary_int - SSE 4.1 ternary operator
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.cpp Sun Jul 6 15:45:41 2008
@@ -15,20 +15,109 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelAsmPrinter.h"
+#include "X86InstrInfo.h"
#include "X86TargetAsmInfo.h"
#include "X86.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/Mangler.h"
#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(EmittedInsts, "Number of machine instrs printed");
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData *TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ unsigned argNum = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI, ++argNum) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (F->paramHasAttr(argNum, ParamAttr::ByVal))
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ Size += ((TD->getABITypeSize(Ty) + 3)/4)*4;
+ }
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+
+/// decorateName - Query FunctionInfoMap and use this information for various
+/// name decoration.
+void X86IntelAsmPrinter::decorateName(std::string &Name,
+ const GlobalValue *GV) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ unsigned CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+
+ const X86MachineFunctionInfo *Info;
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData());
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ switch (Info->getDecorationStyle()) {
+ case None:
+ break;
+ case StdCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+ break;
+ case FastCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+
+ if (Name[0] == '_')
+ Name[0] = '@';
+ else
+ Name = '@' + Name;
+
+ break;
+ default:
+ assert(0 && "Unsupported DecorationStyle");
+ }
+}
+
+
std::string X86IntelAsmPrinter::getSectionForFunction(const Function &F) const {
// Intel asm always emits functions to _text.
return "_text";
@@ -53,7 +142,7 @@
if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
- X86SharedAsmPrinter::decorateName(CurrentFnName, F);
+ decorateName(CurrentFnName, F);
SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
@@ -62,18 +151,18 @@
default: assert(0 && "Unsupported linkage type!");
case Function::InternalLinkage:
EmitAlignment(FnAlign);
- break;
+ break;
case Function::DLLExportLinkage:
DLLExportedFns.insert(CurrentFnName);
//FALLS THROUGH
case Function::ExternalLinkage:
O << "\tpublic " << CurrentFnName << "\n";
EmitAlignment(FnAlign);
- break;
+ break;
}
-
+
O << CurrentFnName << "\tproc near\n";
-
+
// Print out code for the function.
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
I != E; ++I) {
@@ -113,14 +202,14 @@
}
}
-void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
+void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
const char *Modifier) {
switch (MO.getType()) {
- case MachineOperand::MO_Register: {
+ case MachineOperand::MO_Register: {
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- MVT::ValueType VT = (strcmp(Modifier,"subreg64") == 0) ?
+ MVT VT = (strcmp(Modifier,"subreg64") == 0) ?
MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
@@ -142,7 +231,7 @@
O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
<< "_" << MO.getIndex();
return;
- }
+ }
case MachineOperand::MO_ConstantPoolIndex: {
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
if (!isMemOp) O << "OFFSET ";
@@ -159,17 +248,17 @@
case MachineOperand::MO_GlobalAddress: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
- GlobalValue *GV = MO.getGlobal();
+ GlobalValue *GV = MO.getGlobal();
std::string Name = Mang->getValueName(GV);
- X86SharedAsmPrinter::decorateName(Name, GV);
+ decorateName(Name, GV);
if (!isMemOp && !isCallOp) O << "OFFSET ";
if (GV->hasDLLImportLinkage()) {
// FIXME: This should be fixed with full support of stdcall & fastcall
// CC's
- O << "__imp_";
- }
+ O << "__imp_";
+ }
O << Name;
int Offset = MO.getOffset();
if (Offset > 0)
@@ -235,11 +324,11 @@
O << "]";
}
-void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
const MachineBasicBlock *MBB) const {
if (!TAI->getSetDirective())
return;
-
+
O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
<< getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
printBasicBlockLabel(MBB, false, false, false);
@@ -277,12 +366,12 @@
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool X86IntelAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
+ unsigned AsmVariant,
const char *ExtraCode) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
+
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
case 'b': // Print QImode register
@@ -292,14 +381,14 @@
return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
}
}
-
+
printOperand(MI, OpNo);
return false;
}
bool X86IntelAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
- unsigned AsmVariant,
+ unsigned AsmVariant,
const char *ExtraCode) {
if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier.
@@ -318,8 +407,8 @@
}
bool X86IntelAsmPrinter::doInitialization(Module &M) {
- bool Result = X86SharedAsmPrinter::doInitialization(M);
-
+ bool Result = AsmPrinter::doInitialization(M);
+
Mang->markCharUnacceptable('.');
O << "\t.686\n\t.model flat\n\n";
@@ -328,15 +417,15 @@
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->isDeclaration()) {
std::string Name = Mang->getValueName(I);
- X86SharedAsmPrinter::decorateName(Name, I);
+ decorateName(Name, I);
O << "\textern " ;
if (I->hasDLLImportLinkage()) {
O << "__imp_";
- }
+ }
O << Name << ":near\n";
}
-
+
// Emit declarations for external globals. Note that VC++ always declares
// external globals to have type byte, and if that's good enough for VC++...
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
@@ -347,7 +436,7 @@
O << "\textern " ;
if (I->hasDLLImportLinkage()) {
O << "__imp_";
- }
+ }
O << Name << ":byte\n";
}
}
@@ -362,17 +451,18 @@
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (I->isDeclaration()) continue; // External global require no code
-
+
// Check to see if this is a special global used by LLVM, if so, emit it.
if (EmitSpecialLLVMGlobal(I))
continue;
-
+
std::string name = Mang->getValueName(I);
Constant *C = I->getInitializer();
unsigned Align = TD->getPreferredAlignmentLog(I);
bool bCustomSegment = false;
switch (I->getLinkage()) {
+ case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceLinkage:
case GlobalValue::WeakLinkage:
SwitchToDataSection("");
@@ -414,8 +504,7 @@
}
// Output linker support code for dllexported globals
- if (!DLLExportedGVs.empty() ||
- !DLLExportedFns.empty()) {
+ if (!DLLExportedGVs.empty() || !DLLExportedFns.empty()) {
SwitchToDataSection("");
O << "; WARNING: The following code is valid only with MASM v8.x and (possible) higher\n"
<< "; This version of MASM is usually shipped with Microsoft Visual Studio 2005\n"
@@ -424,23 +513,19 @@
O << "_drectve\t segment info alias('.drectve')\n";
}
- for (std::set<std::string>::iterator i = DLLExportedGVs.begin(),
+ for (StringSet<>::iterator i = DLLExportedGVs.begin(),
e = DLLExportedGVs.end();
- i != e; ++i) {
- O << "\t db ' /EXPORT:" << *i << ",data'\n";
- }
+ i != e; ++i)
+ O << "\t db ' /EXPORT:" << i->getKeyData() << ",data'\n";
- for (std::set<std::string>::iterator i = DLLExportedFns.begin(),
+ for (StringSet<>::iterator i = DLLExportedFns.begin(),
e = DLLExportedFns.end();
- i != e; ++i) {
- O << "\t db ' /EXPORT:" << *i << "'\n";
- }
-
- if (!DLLExportedGVs.empty() ||
- !DLLExportedFns.empty()) {
- O << "_drectve\t ends\n";
- }
-
+ i != e; ++i)
+ O << "\t db ' /EXPORT:" << i->getKeyData() << "'\n";
+
+ if (!DLLExportedGVs.empty() || !DLLExportedFns.empty())
+ O << "_drectve\t ends\n";
+
// Bypass X86SharedAsmPrinter::doFinalization().
bool Result = AsmPrinter::doFinalization(M);
SwitchToDataSection("");
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86IntelAsmPrinter.h Sun Jul 6 15:45:41 2008
@@ -14,16 +14,19 @@
#ifndef X86INTELASMPRINTER_H
#define X86INTELASMPRINTER_H
-#include "X86AsmPrinter.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Compiler.h"
namespace llvm {
-struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public X86SharedAsmPrinter {
+struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
X86IntelAsmPrinter(std::ostream &O, X86TargetMachine &TM,
const TargetAsmInfo *T)
- : X86SharedAsmPrinter(O, TM, T) {
+ : AsmPrinter(O, TM, T) {
}
virtual const char *getPassName() const {
@@ -110,12 +113,31 @@
bool runOnMachineFunction(MachineFunction &F);
bool doInitialization(Module &M);
bool doFinalization(Module &M);
-
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+ void decorateName(std::string& Name, const GlobalValue* GV);
+
/// getSectionForFunction - Return the section that we should emit the
/// specified function body into.
virtual std::string getSectionForFunction(const Function &F) const;
virtual void EmitString(const ConstantArray *CVA) const;
+
+ // Necessary for dllexport support
+ StringSet<> DLLExportedFns, DLLExportedGVs;
};
} // end namespace llvm
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86JITInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86JITInfo.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86JITInfo.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86JITInfo.cpp Sun Jul 6 15:45:41 2008
@@ -51,6 +51,13 @@
#define GETASMPREFIX(X) GETASMPREFIX2(X)
#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+// Check if building with -fPIC
+#if defined(__PIC__) && __PIC__ && defined(__linux__)
+#define ASMCALLSUFFIX "@PLT"
+#else
+#define ASMCALLSUFFIX
+#endif
+
// Provide a convenient way for disabling usage of CFI directives.
// This is needed for old/broken assemblers (for example, gas on
// Darwin is pretty old and doesn't support these directives)
@@ -112,7 +119,7 @@
// JIT callee
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
"movaps 96(%rsp), %xmm6\n"
@@ -186,7 +193,7 @@
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
"movl %ebp, %esp\n" // Restore ESP
CFI(".cfi_def_cfa_register %esp\n")
"subl $12, %esp\n"
@@ -240,7 +247,7 @@
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
"addl $16, %esp\n"
"movaps 48(%esp), %xmm3\n"
CFI(".cfi_restore %xmm3\n")
@@ -396,8 +403,8 @@
MachineCodeEmitter &MCE) {
#if defined (X86_64_JIT)
MCE.startFunctionStub(GV, 8, 8);
- MCE.emitWordLE(((unsigned *)&ptr)[0]);
- MCE.emitWordLE(((unsigned *)&ptr)[1]);
+ MCE.emitWordLE((unsigned)(intptr_t)ptr);
+ MCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32));
#else
MCE.startFunctionStub(GV, 4, 4);
MCE.emitWordLE((intptr_t)ptr);
@@ -420,8 +427,8 @@
MCE.startFunctionStub(F, 13, 4);
MCE.emitByte(0x49); // REX prefix
MCE.emitByte(0xB8+2); // movabsq r10
- MCE.emitWordLE(((unsigned *)&Fn)[0]);
- MCE.emitWordLE(((unsigned *)&Fn)[1]);
+ MCE.emitWordLE((unsigned)(intptr_t)Fn);
+ MCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
MCE.emitByte(0x41); // REX prefix
MCE.emitByte(0xFF); // jmpq *r10
MCE.emitByte(2 | (4 << 3) | (3 << 6));
@@ -437,8 +444,8 @@
MCE.startFunctionStub(F, 14, 4);
MCE.emitByte(0x49); // REX prefix
MCE.emitByte(0xB8+2); // movabsq r10
- MCE.emitWordLE(((unsigned *)&Fn)[0]);
- MCE.emitWordLE(((unsigned *)&Fn)[1]);
+ MCE.emitWordLE((unsigned)(intptr_t)Fn);
+ MCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
MCE.emitByte(0x41); // REX prefix
MCE.emitByte(0xFF); // callq *r10
MCE.emitByte(2 | (2 << 3) | (3 << 6));
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86MachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86MachineFunctionInfo.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86MachineFunctionInfo.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86MachineFunctionInfo.h Sun Jul 6 15:45:41 2008
@@ -53,20 +53,27 @@
/// the returnaddr can be savely move to this area
int TailCallReturnAddrDelta;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
DecorationStyle(None),
ReturnAddrIndex(0),
- TailCallReturnAddrDelta(0) {}
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0) {}
X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
DecorationStyle(None),
ReturnAddrIndex(0),
- TailCallReturnAddrDelta(0) {}
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -85,6 +92,9 @@
int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
};
} // End llvm namespace
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.cpp Sun Jul 6 15:45:41 2008
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -36,6 +37,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
using namespace llvm;
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
@@ -249,6 +251,19 @@
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
+static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
+ unsigned MaxAlign = 0;
+ for (int i = FFI->getObjectIndexBegin(),
+ e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ unsigned Align = FFI->getObjectAlignment(i);
+ MaxAlign = std::max(MaxAlign, Align);
+ }
+
+ return MaxAlign;
+}
+
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
@@ -257,16 +272,59 @@
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
- return (NoFramePointerElim ||
+ return (NoFramePointerElim ||
+ needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
(MMI && MMI->callsUnwindInit()));
}
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();;
+
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas
+ return (RealignStack &&
+ (MFI->getMaxAlignment() > StackAlign &&
+ !MFI->hasVarSizedObjects()));
+}
+
bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
+int
+X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
+ uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+
+ if (needsStackRealignment(MF)) {
+ if (FI < 0)
+ // Skip the saved EBP
+ Offset += SlotSize;
+ else {
+ unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
+ assert( (-(Offset + StackSize)) % Align == 0);
+ return Offset + StackSize;
+ }
+
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP
+ Offset += SlotSize;
+
+ // Skip the RETADDR move area
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -333,28 +391,38 @@
}
int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr;
+ if (needsStackRealignment(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+ else
+ BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
- MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false);
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
// Now add the frame object offset to the offset from EBP.
- int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(i+3).getImm()+SlotSize;
+ int64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
+ MI.getOperand(i+3).getImm();
- if (!hasFP(MF))
- Offset += MF.getFrameInfo()->getStackSize();
- else {
- Offset += SlotSize; // Skip the saved EBP
- // Skip the RETADDR move area
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
- }
-
MI.getOperand(i+3).ChangeToImmediate(Offset);
}
void
+X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
+ calculateMaxStackAlignment(FFI));
+
+ FFI->setMaxAlignment(MaxAlign);
+}
+
+void
X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -413,7 +481,7 @@
void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
if (MBBI == MBB.begin()) return;
-
+
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
@@ -437,12 +505,12 @@
MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
return;
-
+
if (MBBI == MBB.end()) return;
-
+
MachineBasicBlock::iterator NI = next(MBBI);
if (NI == MBB.end()) return;
-
+
unsigned Opc = NI->getOpcode();
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
@@ -462,12 +530,12 @@
}
/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted
+/// instruction. If it is an ADD/SUB instruction it is deleted
/// argument and the stack adjustment is returned as a positive value for ADD
-/// and a negative for SUB.
+/// and a negative for SUB.
static int mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr,
+ unsigned StackPtr,
bool doMergeWithPrevious) {
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
@@ -491,11 +559,85 @@
Offset -= PI->getOperand(2).getImm();
MBB.erase(PI);
if (!doMergeWithPrevious) MBBI = NI;
- }
+ }
return Offset;
}
+void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
+ unsigned FrameLabelId,
+ unsigned ReadyLabelId) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ if (!MMI)
+ return;
+
+ uint64_t StackSize = MFI->getStackSize();
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth =
+ (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize());
+
+ if (StackSize) {
+ // Show update of SP.
+ if (hasFP(MF)) {
+ // Adjust SP
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize+stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+ } else {
+ //FIXME: Verify & implement for FP
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minumum due to stack growth)
+ int64_t MaxOffset = 0;
+ for (unsigned I = 0, E = CSI.size(); I!=E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(CSI[I].getFrameIdx()));
+
+ // Calculate offsets
+ int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth;
+ for (unsigned I = 0, E = CSI.size(); I!=E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ Offset = (MaxOffset-Offset+saveAreaOffset);
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ if (hasFP(MF)) {
+ // Save FP
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+}
+
+
void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -504,57 +646,72 @@
MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
- bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
+ bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
!Fn->doesNotThrow() ||
UnwindTablesMandatory;
-
// Prepare for frame info.
unsigned FrameLabelId = 0;
-
+
// Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI->getStackSize();
+ // Get desired stack alignment
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
+ if (TailCallReturnAddrDelta < 0)
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
- uint64_t NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
// Insert stack pointer adjustment for later moving of return addr. Only
// applies to tail call optimized functions where the callee argument stack
// size is bigger than the callers.
if (TailCallReturnAddrDelta < 0) {
- BuildMI(MBB, MBBI, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+ BuildMI(MBB, MBBI, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
}
+ uint64_t NumBytes = 0;
if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
// Get the offset of the stack slot for the EBP register... which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
// Update the frame offset adjustment.
- MFI->setOffsetAdjustment(SlotSize-NumBytes);
+ MFI->setOffsetAdjustment(-NumBytes);
// Save EBP into the appropriate stack slot...
BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(FramePtr);
- NumBytes -= SlotSize;
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, TII.get(X86::LABEL)).addImm(FrameLabelId).addImm(0);
+ BuildMI(MBB, MBBI, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
}
// Update EBP with the new base value...
BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr);
- }
-
+
+ // Realign stack
+ if (needsStackRealignment(MF))
+ BuildMI(MBB, MBBI,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+ StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+ } else
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+
unsigned ReadyLabelId = 0;
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer is ready.
ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, TII.get(X86::LABEL)).addImm(ReadyLabelId).addImm(0);
+ BuildMI(MBB, MBBI, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
}
// Skip the callee-saved push instructions.
@@ -575,8 +732,8 @@
Reg == X86::AH || Reg == X86::AL);
}
- // Function prologue calls _alloca to probe the stack when allocating
- // more than 4k bytes in one go. Touching the stack at 4K increments is
+ // Function prologue calls _alloca to probe the stack when allocating
+ // more than 4k bytes in one go. Touching the stack at 4K increments is
// necessary to ensure that the guard pages used by the OS virtual memory
// manager are allocated in correct sequence.
if (!isEAXAlive) {
@@ -593,7 +750,7 @@
.addExternalSymbol("_alloca");
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(TII.get(X86::MOV32rm),X86::EAX),
- StackPtr, NumBytes-4);
+ StackPtr, false, NumBytes-4);
MBB.insert(MBBI, MI);
}
} else {
@@ -604,95 +761,20 @@
// If there is an ADD32ri or SUB32ri of ESP immediately after this
// instruction, merge the two instructions.
mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
+
if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
}
}
- if (needsFrameMoves) {
- std::vector<MachineMove> &Moves = MMI->getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
-
- // Calculate amount of bytes used for return address storing
- int stackGrowth =
- (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsUp ?
- TD->getPointerSize() : -TD->getPointerSize());
-
- if (StackSize) {
- // Show update of SP.
- if (hasFP(MF)) {
- // Adjust SP
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
- Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
- } else {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP,
- -StackSize+stackGrowth);
- Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
- }
- } else {
- //FIXME: Verify & implement for FP
- MachineLocation SPDst(StackPtr);
- MachineLocation SPSrc(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
- }
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
- // FIXME: This is dirty hack. The code itself is pretty mess right now.
- // It should be rewritten from scratch and generalized sometimes.
-
- // Determine maximum offset (minumum due to stack growth)
- int64_t MaxOffset = 0;
- for (unsigned I = 0, E = CSI.size(); I!=E; ++I)
- MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(CSI[I].getFrameIdx()));
-
- // Calculate offsets
- int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth;
- for (unsigned I = 0, E = CSI.size(); I!=E; ++I) {
- int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- Offset = (MaxOffset-Offset+saveAreaOffset);
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
- }
-
- if (hasFP(MF)) {
- // Save FP
- MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
- MachineLocation FPSrc(FramePtr);
- Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
- }
-
- MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
- }
-
- // If it's main() on Cygwin\Mingw32 we should align stack as well
- if (Fn->hasExternalLinkage() && Fn->getName() == "main" &&
- Subtarget->isTargetCygMing()) {
- BuildMI(MBB, MBBI, TII.get(X86::AND32ri), X86::ESP)
- .addReg(X86::ESP).addImm(-StackAlign);
-
- // Probe the stack
- BuildMI(MBB, MBBI, TII.get(X86::MOV32ri), X86::EAX).addImm(StackAlign);
- BuildMI(MBB, MBBI, TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca");
- }
+ if (needsFrameMoves)
+ emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
}
void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function* Fn = MF.getFunction();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
MachineBasicBlock::iterator MBBI = prior(MBB.end());
unsigned RetOpcode = MBBI->getOpcode();
@@ -713,16 +795,25 @@
// Get the number of bytes to allocate from the FrameInfo
uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
- uint64_t NumBytes = StackSize - CSSize;
+ uint64_t NumBytes = 0;
if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
// pop EBP.
BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
- NumBytes -= SlotSize;
- }
+ } else
+ NumBytes = StackSize - CSSize;
// Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
@@ -738,35 +829,42 @@
mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
// If dynamic alloca is used, then reset esp to point to the last callee-saved
- // slot before popping them off! Also, if it's main() on Cygwin/Mingw32 we
- // aligned stack in the prologue, - revert stack changes back. Note: we're
- // assuming, that frame pointer was forced for main()
- if (MFI->hasVarSizedObjects() ||
- (Fn->hasExternalLinkage() && Fn->getName() == "main" &&
- Subtarget->isTargetCygMing())) {
- unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned
+ if (needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
MachineInstr *MI = addRegOffset(BuildMI(TII.get(Opc), StackPtr),
- FramePtr, -CSSize);
+ FramePtr, false, -CSSize);
MBB.insert(MBBI, MI);
} else
BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr).
addReg(FramePtr);
- NumBytes = 0;
+ } else {
+ // adjust stack pointer back: ESP += numbytes
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
}
- // adjust stack pointer back: ESP += numbytes
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
-
// We're returning from function via eh_return.
if (RetOpcode == X86::EH_RETURN) {
MBBI = prior(MBB.end());
MachineOperand &DestAddr = MBBI->getOperand(0);
assert(DestAddr.isRegister() && "Offset should be in register!");
BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr).
- addReg(DestAddr.getReg());
+ addReg(DestAddr.getReg());
// Tail call return: adjust the stack pointer and jump to callee
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
@@ -774,7 +872,7 @@
MachineOperand &JumpTarget = MBBI->getOperand(0);
MachineOperand &StackAdjust = MBBI->getOperand(1);
assert( StackAdjust.isImmediate() && "Expecting immediate value.");
-
+
// Adjust stack pointer.
int StackAdj = StackAdjust.getImm();
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
@@ -787,7 +885,7 @@
// Check for possible merge with preceeding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
- }
+ }
// Jump to label or value in register.
if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64)
BuildMI(MBB, MBBI, TII.get(X86::TAILJMPd)).
@@ -798,7 +896,7 @@
BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr), JumpTarget.getReg());
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
- } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
(X86FI->getTCReturnAddrDelta() < 0)) {
// Add the return addr area delta back since we are not tail calling.
int delta = -1*X86FI->getTCReturnAddrDelta();
@@ -820,20 +918,6 @@
return hasFP(MF) ? FramePtr : StackPtr;
}
-int
-X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
- int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
- if (!hasFP(MF))
- return Offset + MF.getFrameInfo()->getStackSize();
-
- Offset += SlotSize; // Skip the saved EBP
- // Skip the RETADDR move area
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
- return Offset;
-}
-
void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
const {
// Calculate amount of bytes used for return address storing
@@ -861,8 +945,8 @@
}
namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) {
- switch (VT) {
+unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) {
+ switch (VT.getSimpleVT()) {
default: return Reg;
case MVT::i8:
if (High) {
@@ -1030,3 +1114,37 @@
#include "X86GenRegisterInfo.inc"
+namespace {
+ struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass {
+ static char ID;
+ MSAC() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+
+ // Calculate max stack alignment of all already allocated stack objects.
+ unsigned MaxAlign = calculateMaxStackAlignment(FFI);
+
+ // Be over-conservative: scan over all vreg defs and find, whether vector
+ // registers are used. If yes - there is probability, that vector register
+ // will be spilled and thus stack needs to be aligned properly.
+ for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+ RegNum < RI.getLastVirtReg(); ++RegNum)
+ MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+ FFI->setMaxAlignment(MaxAlign);
+
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 Maximal Stack Alignment Calculator";
+ }
+ };
+
+ char MSAC::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.h Sun Jul 6 15:45:41 2008
@@ -115,6 +115,8 @@
bool hasFP(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
bool hasReservedCallFrame(MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -125,10 +127,15 @@
int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitFrameMoves(MachineFunction &MF,
+ unsigned FrameLabelId, unsigned ReadyLabelId) const;
+
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(MachineFunction &MF) const;
@@ -143,7 +150,7 @@
// getX86SubSuperRegister - X86 utility function. It returns the sub or super
// register of a specific X86 register.
// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
-unsigned getX86SubSuperRegister(unsigned, MVT::ValueType, bool High=false);
+unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false);
} // End llvm namespace
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.td?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86RegisterInfo.td Sun Jul 6 15:45:41 2008
@@ -509,7 +509,7 @@
}
// Generic vector registers: VR64 and VR128.
-def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.cpp Sun Jul 6 15:45:41 2008
@@ -16,9 +16,10 @@
#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-cl::opt<X86Subtarget::AsmWriterFlavorTy>
+static cl::opt<X86Subtarget::AsmWriterFlavorTy>
AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset),
cl::desc("Choose style of code to emit from X86 backend:"),
cl::values(
@@ -26,10 +27,6 @@
clEnumValN(X86Subtarget::Intel, "intel", " Emit Intel-style assembly"),
clEnumValEnd));
-cl::opt<unsigned>
-StackAlignment("stack-alignment", cl::init(0),
- cl::desc("Override default stack alignment"));
-
/// True if accessing the GV requires an extra load. For Windows, dllimported
/// symbols are indirect, loading the value at address GV rather then the
@@ -44,11 +41,15 @@
if (isTargetDarwin()) {
return (!isDirectCall &&
(GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() ||
(GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode())));
} else if (isTargetELF()) {
- // Extra load is needed for all non-statics.
- return (!isDirectCall &&
- (GV->isDeclaration() || !GV->hasInternalLinkage()));
+ // Extra load is needed for all externally visible.
+ if (isDirectCall)
+ return false;
+ if (GV->hasInternalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
} else if (isTargetCygMing() || isTargetWindows()) {
return (GV->hasDLLImportLinkage());
}
@@ -244,12 +245,13 @@
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
, DarwinVers(0)
+ , IsLinux(false)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
, Is64Bit(is64Bit)
, TargetType(isELF) { // Default to ELF unless otherwise specified.
-
+
// Determine default and user specified characteristics
if (!FS.empty()) {
// If feature string is not empty, parse features string.
@@ -281,6 +283,10 @@
DarwinVers = atoi(&TT[Pos+7]);
else
DarwinVers = 8; // Minimum supported darwin is Tiger.
+ } else if (TT.find("linux") != std::string::npos) {
+ // Linux doesn't imply ELF, but we don't currently support anything else.
+ TargetType = isELF;
+ IsLinux = true;
} else if (TT.find("cygwin") != std::string::npos) {
TargetType = isCygwin;
} else if (TT.find("mingw") != std::string::npos) {
@@ -305,6 +311,10 @@
#elif defined(_WIN32) || defined(_WIN64)
TargetType = isWindows;
+#elif defined(__linux__)
+ // Linux doesn't imply ELF, but we don't currently support anything else.
+ TargetType = isELF;
+ IsLinux = true;
#endif
}
@@ -315,11 +325,9 @@
? X86Subtarget::Intel : X86Subtarget::ATT;
}
- if (TargetType == isDarwin ||
- TargetType == isCygwin ||
- TargetType == isMingw ||
- TargetType == isWindows ||
- (TargetType == isELF && Is64Bit))
+ // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
+ // bit targets.
+ if (TargetType == isDarwin || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86Subtarget.h Sun Jul 6 15:45:41 2008
@@ -67,7 +67,10 @@
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
- unsigned char DarwinVers; // Is any darwin-ppc platform.
+ unsigned char DarwinVers; // Is any darwin-x86 platform.
+
+ /// isLinux - true if this is a "linux" platform.
+ bool IsLinux;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@@ -132,7 +135,9 @@
bool isFlavorIntel() const { return AsmFlavor == Intel; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
- bool isTargetELF() const { return TargetType == isELF; }
+ bool isTargetELF() const {
+ return TargetType == isELF;
+ }
bool isTargetWindows() const { return TargetType == isWindows; }
bool isTargetMingw() const { return TargetType == isMingw; }
bool isTargetCygMing() const { return (TargetType == isMingw ||
@@ -164,6 +169,9 @@
/// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
unsigned getDarwinVers() const { return DarwinVers; }
+ /// isLinux - Return true if the target is "Linux".
+ bool isLinux() const { return IsLinux; }
+
/// True if accessing the GV requires an extra load. For Windows, dllimported
/// symbols are indirect, loading the value at address GV rather then the
/// value of GV itself. This means that the GlobalAddress must be in the base
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86TargetAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86TargetAsmInfo.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86TargetAsmInfo.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86TargetAsmInfo.cpp Sun Jul 6 15:45:41 2008
@@ -66,6 +66,7 @@
ReadOnlySection = "\t.const\n";
LCOMMDirective = "\t.lcomm\t";
SwitchToSectionDirective = "\t.section ";
+ StringConstantPrefix = "\1LC";
COMMDirectiveTakesAlignment = false;
HasDotTypeDotSizeDirective = false;
if (TM.getRelocationModel() == Reloc::Static) {
@@ -120,8 +121,6 @@
GlobalEHDirective = "\t.globl\t";
SupportsWeakOmittedEHFrame = false;
AbsoluteEHSectionOffsets = false;
- if (Subtarget->is64Bit())
- ShortenEHDataOn64Bit = true;
DwarfEHFrameSection =
".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
@@ -229,6 +228,10 @@
SectionEndDirectiveSuffix = "\tends\n";
}
+ // On Linux we must declare when we can use a non-executable stack.
+ if (Subtarget->isLinux())
+ NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\", at progbits";
+
AssemblerDialect = Subtarget->getAsmFlavor();
}
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.cpp Sun Jul 6 15:45:41 2008
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/Transforms/Scalar.h"
using namespace llvm;
/// X86TargetMachineModule - Note that this is used on hosts that cannot link
@@ -30,13 +29,11 @@
extern "C" int X86TargetMachineModule;
int X86TargetMachineModule = 0;
-namespace {
- // Register the target.
- RegisterTarget<X86_32TargetMachine>
- X("x86", " 32-bit X86: Pentium-Pro and above");
- RegisterTarget<X86_64TargetMachine>
- Y("x86-64", " 64-bit X86: EM64T and AMD64");
-}
+// Register the target.
+static RegisterTarget<X86_32TargetMachine>
+X("x86", " 32-bit X86: Pentium-Pro and above");
+static RegisterTarget<X86_64TargetMachine>
+Y("x86-64", " 64-bit X86: EM64T and AMD64");
const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
return new X86TargetAsmInfo(*this);
@@ -161,6 +158,13 @@
return false;
}
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, bool Fast) {
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ PM.add(createX86MaxStackAlignmentCalculatorPass());
+ return false; // -print-machineinstr shouldn't print after this.
+}
+
bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool Fast) {
PM.add(createX86FloatingPointStackifierPass());
return true; // -print-machineinstr should print after this.
@@ -175,10 +179,8 @@
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, bool Fast,
bool DumpAsm, MachineCodeEmitter &MCE) {
// FIXME: Move this to TargetJITInfo!
- if (DefRelocModel == Reloc::Default) {
+ if (DefRelocModel == Reloc::Default)
setRelocationModel(Reloc::Static);
- Subtarget.setPICStyle(PICStyle::None);
- }
// JIT cannot ensure globals are placed in the lower 4G of address.
if (Subtarget.is64Bit())
Modified: llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.h?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/non-call-eh/lib/Target/X86/X86TargetMachine.h Sun Jul 6 15:45:41 2008
@@ -44,12 +44,12 @@
virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
- virtual TargetJITInfo *getJITInfo() { return &JITInfo; }
- virtual const TargetSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual X86JITInfo *getJITInfo() { return &JITInfo; }
+ virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
virtual X86TargetLowering *getTargetLowering() const {
return const_cast<X86TargetLowering*>(&TLInfo);
}
- virtual const TargetRegisterInfo *getRegisterInfo() const {
+ virtual const X86RegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -61,7 +61,8 @@
static unsigned getJITMatchQuality();
// Set up the pass pipeline.
- virtual bool addInstSelector(PassManagerBase &PM, bool Fast);
+ virtual bool addInstSelector(PassManagerBase &PM, bool Fast);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, bool Fast);
virtual bool addPostRegAlloc(PassManagerBase &PM, bool Fast);
virtual bool addAssemblyEmitter(PassManagerBase &PM, bool Fast,
std::ostream &Out);
Modified: llvm/branches/non-call-eh/lib/Transforms/Hello/Hello.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Hello/Hello.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Hello/Hello.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Hello/Hello.cpp Sun Jul 6 15:45:41 2008
@@ -36,10 +36,12 @@
return false;
}
};
+}
- char Hello::ID = 0;
- RegisterPass<Hello> X("hello", "Hello World Pass");
+char Hello::ID = 0;
+static RegisterPass<Hello> X("hello", "Hello World Pass");
+namespace {
// Hello2 - The second implementation with getAnalysisUsage implemented.
struct Hello2 : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
@@ -58,7 +60,8 @@
AU.setPreservesAll();
};
};
- char Hello2::ID = 0;
- RegisterPass<Hello2> Y("hello2",
- "Hello World Pass (with getAnalysisUsage implemented)");
}
+
+char Hello2::ID = 0;
+static RegisterPass<Hello2>
+Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/ArgumentPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/ArgumentPromotion.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/ArgumentPromotion.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/ArgumentPromotion.cpp Sun Jul 6 15:45:41 2008
@@ -66,7 +66,8 @@
virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC);
static char ID; // Pass identification, replacement for typeid
- ArgPromotion(unsigned maxElements = 3) : CallGraphSCCPass((intptr_t)&ID), maxElements(maxElements) {}
+ ArgPromotion(unsigned maxElements = 3) : CallGraphSCCPass((intptr_t)&ID),
+ maxElements(maxElements) {}
private:
bool PromoteArguments(CallGraphNode *CGN);
@@ -74,15 +75,15 @@
Function *DoPromotion(Function *F,
SmallPtrSet<Argument*, 8> &ArgsToPromote,
SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
- /// The maximum number of elements to expand, or 0 for unlimited.
- unsigned maxElements;
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned maxElements;
};
-
- char ArgPromotion::ID = 0;
- RegisterPass<ArgPromotion> X("argpromotion",
- "Promote 'by reference' arguments to scalars");
}
+char ArgPromotion::ID = 0;
+static RegisterPass<ArgPromotion>
+X("argpromotion", "Promote 'by reference' arguments to scalars");
+
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
}
@@ -147,16 +148,16 @@
Argument *PtrArg = PointerArgs[i].first;
if (isByVal) {
const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
- if (const StructType *STy = dyn_cast<StructType>(AgTy))
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DOUT << "argpromotion disable promoting argument '"
<< PtrArg->getName() << "' because it would require adding more "
<< "than " << maxElements << " arguments to the function.\n";
} else {
- // If all the elements are first class types, we can promote it.
+ // If all the elements are single-value types, we can promote it.
bool AllSimple = true;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- if (!STy->getElementType(i)->isFirstClassType()) {
+ if (!STy->getElementType(i)->isSingleValueType()) {
AllSimple = false;
break;
}
@@ -169,6 +170,7 @@
continue;
}
}
+ }
}
// Otherwise, see if we can promote the pointer to its value.
@@ -260,8 +262,9 @@
}
// Ensure that all of the indices are constants.
SmallVector<ConstantInt*, 8> Operands;
- for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i)
- if (ConstantInt *C = dyn_cast<ConstantInt>(GEP->getOperand(i)))
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
Operands.push_back(C);
else
return false; // Not a constant operand GEP!
@@ -290,7 +293,7 @@
<< Arg->getName() << "' because it would require adding more "
<< "than " << maxElements << " arguments to the function.\n";
// We limit aggregate promotion to only promoting up to a fixed number
- // of elements of the aggregate.
+ // of elements of the aggregate.
return false;
}
GEPIndices.push_back(Operands);
@@ -477,15 +480,13 @@
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
- NF->setCallingConv(F->getCallingConv());
+ NF->copyAttributesFrom(F);
// Recompute the parameter attributes list based on the new arguments for
// the function.
NF->setParamAttrs(PAListPtr::get(ParamAttrsVec.begin(), ParamAttrsVec.end()));
ParamAttrsVec.clear();
-
- if (F->hasCollector())
- NF->setCollector(F->getCollector());
+
F->getParent()->getFunctionList().insert(F, NF);
NF->takeName(F);
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/ConstantMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/ConstantMerge.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/ConstantMerge.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/ConstantMerge.cpp Sun Jul 6 15:45:41 2008
@@ -38,11 +38,12 @@
//
bool runOnModule(Module &M);
};
-
- char ConstantMerge::ID = 0;
- RegisterPass<ConstantMerge>X("constmerge","Merge Duplicate Global Constants");
}
+char ConstantMerge::ID = 0;
+static RegisterPass<ConstantMerge>
+X("constmerge", "Merge Duplicate Global Constants");
+
ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
bool ConstantMerge::runOnModule(Module &M) {
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/DeadArgumentElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/DeadArgumentElimination.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/DeadArgumentElimination.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/DeadArgumentElimination.cpp Sun Jul 6 15:45:41 2008
@@ -97,9 +97,13 @@
void RemoveDeadArgumentsFromFunction(Function *F);
};
- char DAE::ID = 0;
- RegisterPass<DAE> X("deadargelim", "Dead Argument Elimination");
+}
+
+char DAE::ID = 0;
+static RegisterPass<DAE>
+X("deadargelim", "Dead Argument Elimination");
+namespace {
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
/// deletes arguments to functions which are external. This is only for use
/// by bugpoint.
@@ -107,11 +111,12 @@
static char ID;
virtual bool ShouldHackArguments() const { return true; }
};
- char DAH::ID = 0;
- RegisterPass<DAH> Y("deadarghaX0r",
- "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)");
}
+char DAH::ID = 0;
+static RegisterPass<DAH>
+Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)");
+
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
///
@@ -158,10 +163,7 @@
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, Fn.getLinkage());
- NF->setCallingConv(Fn.getCallingConv());
- NF->setParamAttrs(Fn.getParamAttrs());
- if (Fn.hasCollector())
- NF->setCollector(Fn.getCollector());
+ NF->copyAttributesFrom(&Fn);
Fn.getParent()->getFunctionList().insert(&Fn, NF);
NF->takeName(&Fn);
@@ -300,6 +302,12 @@
FunctionIntrinsicallyLive = true;
else
for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) {
+ // If the function is PASSED IN as an argument, its address has been taken
+ if (I.getOperandNo() != 0) {
+ FunctionIntrinsicallyLive = true;
+ break;
+ }
+
// If this use is anything other than a call site, the function is alive.
CallSite CS = CallSite::get(*I);
Instruction *TheCall = CS.getInstruction();
@@ -327,15 +335,6 @@
RetValLiveness = Live;
break;
}
-
- // If the function is PASSED IN as an argument, its address has been taken
- for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
- AI != E; ++AI)
- if (AI->get() == &F) {
- FunctionIntrinsicallyLive = true;
- break;
- }
- if (FunctionIntrinsicallyLive) break;
}
if (FunctionIntrinsicallyLive) {
@@ -551,10 +550,8 @@
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, F->getLinkage());
- NF->setCallingConv(F->getCallingConv());
+ NF->copyAttributesFrom(F);
NF->setParamAttrs(NewPAL);
- if (F->hasCollector())
- NF->setCollector(F->getCollector());
F->getParent()->getFunctionList().insert(F, NF);
NF->takeName(F);
@@ -626,7 +623,7 @@
// Finally, remove the old call from the program, reducing the use-count of
// F.
- Call->getParent()->getInstList().erase(Call);
+ Call->eraseFromParent();
}
// Since we have now created the new function, splice the body of the old
@@ -665,7 +662,7 @@
}
// Now that the old function is dead, delete it.
- F->getParent()->getFunctionList().erase(F);
+ F->eraseFromParent();
}
bool DAE::runOnModule(Module &M) {
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/DeadTypeElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/DeadTypeElimination.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/DeadTypeElimination.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/DeadTypeElimination.cpp Sun Jul 6 15:45:41 2008
@@ -43,10 +43,11 @@
AU.addRequired<FindUsedTypes>();
}
};
- char DTE::ID = 0;
- RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination");
}
+char DTE::ID = 0;
+static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination");
+
ModulePass *llvm::createDeadTypeEliminationPass() {
return new DTE();
}
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/ExtractGV.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/ExtractGV.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/ExtractGV.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/ExtractGV.cpp Sun Jul 6 15:45:41 2008
@@ -123,10 +123,7 @@
if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
Function *New = Function::Create(I->getFunctionType(),
GlobalValue::ExternalLinkage);
- New->setCallingConv(I->getCallingConv());
- New->setParamAttrs(I->getParamAttrs());
- if (I->hasCollector())
- New->setCollector(I->getCollector());
+ New->copyAttributesFrom(I);
// If it's not the named function, delete the body of the function
I->dropAllReferences();
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalDCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalDCE.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalDCE.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalDCE.cpp Sun Jul 6 15:45:41 2008
@@ -49,10 +49,11 @@
bool SafeToDestroyConstant(Constant* C);
bool RemoveUnusedGlobalValue(GlobalValue &GV);
};
- char GlobalDCE::ID = 0;
- RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination");
}
+char GlobalDCE::ID = 0;
+static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination");
+
ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
bool GlobalDCE::runOnModule(Module &M) {
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalOpt.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/GlobalOpt.cpp Sun Jul 6 15:45:41 2008
@@ -28,6 +28,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -67,13 +68,15 @@
bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
};
-
- char GlobalOpt::ID = 0;
- RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
}
+char GlobalOpt::ID = 0;
+static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
+
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+namespace {
+
/// GlobalStatus - As we analyze each global, keep track of some information
/// about it. If we find out that the address of the global is taken, none of
/// this info will be accurate.
@@ -128,7 +131,7 @@
HasNonInstructionUser(false), HasPHIUser(false) {}
};
-
+}
/// ConstantIsDead - Return true if the specified constant is (transitively)
/// dead. The constant may be used by other constants (e.g. constant arrays and
@@ -455,7 +458,7 @@
/// behavior of the program in a more fine-grained way. We have determined that
/// this transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
@@ -467,8 +470,14 @@
std::vector<GlobalVariable*> NewGlobals;
Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+ // Get the alignment of the global, either explicit or target-specific.
+ unsigned StartAlignment = GV->getAlignment();
+ if (StartAlignment == 0)
+ StartAlignment = TD.getABITypeAlignment(GV->getType());
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
NewGlobals.reserve(STy->getNumElements());
+ const StructLayout &Layout = *TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Constant *In = getAggregateConstantElement(Init,
ConstantInt::get(Type::Int32Ty, i));
@@ -480,19 +489,28 @@
GV->isThreadLocal());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ uint64_t FieldOffset = Layout.getElementOffset(i);
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+ if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+ NGV->setAlignment(NewAlign);
}
} else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
unsigned NumElements = 0;
if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
NumElements = ATy->getNumElements();
- else if (const VectorType *PTy = dyn_cast<VectorType>(STy))
- NumElements = PTy->getNumElements();
else
- assert(0 && "Unknown aggregate sequential type!");
+ NumElements = cast<VectorType>(STy)->getNumElements();
if (NumElements > 16 && GV->hasNUsesOrMore(16))
return 0; // It's not worth it.
NewGlobals.reserve(NumElements);
+
+ uint64_t EltSize = TD.getABITypeSize(STy->getElementType());
+ unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = getAggregateConstantElement(Init,
ConstantInt::get(Type::Int32Ty, i));
@@ -505,6 +523,13 @@
GV->isThreadLocal());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
+
+ // Calculate the known alignment of the field. If the original aggregate
+ // had 256 byte alignment for example, something might depend on that:
+ // propagate info to each field.
+ unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+ if (NewAlign > EltAlign)
+ NGV->setAlignment(NewAlign);
}
}
@@ -679,8 +704,9 @@
// Should handle GEP here.
SmallVector<Constant*, 8> Idxs;
Idxs.reserve(GEPI->getNumOperands()-1);
- for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
- if (Constant *C = dyn_cast<Constant>(GEPI->getOperand(i)))
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(*i))
Idxs.push_back(C);
else
break;
@@ -804,6 +830,9 @@
GV->getName()+".body",
(Module *)NULL,
GV->isThreadLocal());
+ // FIXME: This new global should have the alignment returned by malloc. Code
+ // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+ // this would only guarantee some lower alignment.
GV->getParent()->getGlobalList().insert(GV, NewGV);
// Anything that used the malloc now uses the global directly.
@@ -844,7 +873,7 @@
case ICmpInst::ICMP_ULE:
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_EQ:
- LV = BinaryOperator::createNot(LV, "notinit", CI);
+ LV = BinaryOperator::CreateNot(LV, "notinit", CI);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGE:
@@ -956,8 +985,8 @@
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
- for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E;
- ++UI) {
+ for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end();
+ UI != E; ++UI) {
// Comparison against null is ok.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
@@ -1054,7 +1083,8 @@
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
- Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx.begin(), GEPIdx.end(),
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+ GEPIdx.begin(), GEPIdx.end(),
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
@@ -1164,7 +1194,7 @@
if (!RunningOr)
RunningOr = Cond; // First seteq
else
- RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI);
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI);
}
// Split the basic block at the old malloc.
@@ -1250,9 +1280,10 @@
else if (GetElementPtrInst *GEPI =dyn_cast<GetElementPtrInst>(StoredOnceVal)){
// "getelementptr Ptr, 0, 0, 0" is really just a cast.
bool IsJustACast = true;
- for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
- if (!isa<Constant>(GEPI->getOperand(i)) ||
- !cast<Constant>(GEPI->getOperand(i))->isNullValue()) {
+ for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+ i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
IsJustACast = false;
break;
}
@@ -1464,11 +1495,11 @@
// this global a local variable) we replace the global with a local alloca
// in this function.
//
- // NOTE: It doesn't make sense to promote non first class types since we
+ // NOTE: It doesn't make sense to promote non single-value types since we
// are just replacing static memory to stack memory.
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction && !GS.HasNonInstructionUser &&
- GV->getType()->getElementType()->isFirstClassType() &&
+ GV->getType()->getElementType()->isSingleValueType() &&
GS.AccessingFunction->getName() == "main" &&
GS.AccessingFunction->hasExternalLinkage()) {
DOUT << "LOCALIZING GLOBAL: " << *GV;
@@ -1519,8 +1550,9 @@
++NumMarked;
return true;
- } else if (!GV->getInitializer()->getType()->isFirstClassType()) {
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV)) {
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV,
+ getAnalysis<TargetData>())) {
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
@@ -1576,8 +1608,9 @@
if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
// See if the function address is passed as an argument.
- for (unsigned i = 1, e = User->getNumOperands(); i != e; ++i)
- if (User->getOperand(i) == F) return false;
+ for (User::op_iterator i = User->op_begin() + 1, e = User->op_end();
+ i != e; ++i)
+ if (*i == F) return false;
}
return true;
}
@@ -1682,8 +1715,8 @@
if (!I->hasInitializer()) return 0;
ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
if (!CA) return 0;
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(CA->getOperand(i))) {
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
if (isa<ConstantPointerNull>(CS->getOperand(1)))
continue;
@@ -1710,8 +1743,8 @@
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
std::vector<Function*> Result;
Result.reserve(CA->getNumOperands());
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
- ConstantStruct *CS = cast<ConstantStruct>(CA->getOperand(i));
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
}
return Result;
@@ -1824,8 +1857,8 @@
// Break up the constant into its elements.
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
- for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
- Elts.push_back(CS->getOperand(i));
+ for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
} else if (isa<ConstantAggregateZero>(Init)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
@@ -1852,8 +1885,8 @@
// Break up the array into elements.
std::vector<Constant*> Elts;
if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- Elts.push_back(CA->getOperand(i));
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+ Elts.push_back(cast<Constant>(*i));
} else if (isa<ConstantAggregateZero>(Init)) {
Constant *Elt = Constant::getNullValue(ATy->getElementType());
Elts.assign(ATy->getNumElements(), Elt);
@@ -1981,8 +2014,9 @@
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
Constant *P = getVal(Values, GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
- for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i)
- GEPOps.push_back(getVal(Values, GEP->getOperand(i)));
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i)
+ GEPOps.push_back(getVal(Values, *i));
InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
if (LI->isVolatile()) return false; // no volatile accesses.
@@ -2006,8 +2040,9 @@
if (!Callee) return false; // Cannot resolve.
std::vector<Constant*> Formals;
- for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
- Formals.push_back(getVal(Values, CI->getOperand(i)));
+ for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
+ i != e; ++i)
+ Formals.push_back(getVal(Values, *i));
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/IPConstantPropagation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/IPConstantPropagation.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/IPConstantPropagation.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/IPConstantPropagation.cpp Sun Jul 6 15:45:41 2008
@@ -21,6 +21,7 @@
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Compiler.h"
#include "llvm/ADT/Statistic.h"
@@ -42,10 +43,12 @@
bool PropagateConstantsIntoArguments(Function &F);
bool PropagateConstantReturn(Function &F);
};
- char IPCP::ID = 0;
- RegisterPass<IPCP> X("ipconstprop", "Interprocedural constant propagation");
}
+char IPCP::ID = 0;
+static RegisterPass<IPCP>
+X("ipconstprop", "Interprocedural constant propagation");
+
ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
bool IPCP::runOnModule(Module &M) {
@@ -76,156 +79,199 @@
bool IPCP::PropagateConstantsIntoArguments(Function &F) {
if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
- std::vector<std::pair<Constant*, bool> > ArgumentConstants;
+ // For each argument, keep track of its constant value and whether it is a
+ // constant or not. The bool is driven to true when found to be non-constant.
+ SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
ArgumentConstants.resize(F.arg_size());
unsigned NumNonconstant = 0;
-
- for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I)
- if (!isa<Instruction>(*I))
- return false; // Used by a non-instruction, do not transform
- else {
- CallSite CS = CallSite::get(cast<Instruction>(*I));
- if (CS.getInstruction() == 0 ||
- CS.getCalledFunction() != &F)
- return false; // Not a direct call site?
-
- // Check out all of the potentially constant arguments
- CallSite::arg_iterator AI = CS.arg_begin();
- Function::arg_iterator Arg = F.arg_begin();
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
- ++i, ++AI, ++Arg) {
- if (*AI == &F) return false; // Passes the function into itself
-
- if (!ArgumentConstants[i].second) {
- if (Constant *C = dyn_cast<Constant>(*AI)) {
- if (!ArgumentConstants[i].first)
- ArgumentConstants[i].first = C;
- else if (ArgumentConstants[i].first != C) {
- // Became non-constant
- ArgumentConstants[i].second = true;
- ++NumNonconstant;
- if (NumNonconstant == ArgumentConstants.size()) return false;
- }
- } else if (*AI != &*Arg) { // Ignore recursive calls with same arg
- // This is not a constant argument. Mark the argument as
- // non-constant.
- ArgumentConstants[i].second = true;
- ++NumNonconstant;
- if (NumNonconstant == ArgumentConstants.size()) return false;
- }
- }
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ // Used by a non-instruction, or not the callee of a function, do not
+ // transform.
+ if (UI.getOperandNo() != 0 ||
+ (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI)))
+ return false;
+
+ CallSite CS = CallSite::get(cast<Instruction>(*UI));
+
+ // Check out all of the potentially constant arguments. Note that we don't
+ // inspect varargs here.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ Function::arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+ ++i, ++AI, ++Arg) {
+
+ // If this argument is known non-constant, ignore it.
+ if (ArgumentConstants[i].second)
+ continue;
+
+ Constant *C = dyn_cast<Constant>(*AI);
+ if (C && ArgumentConstants[i].first == 0) {
+ ArgumentConstants[i].first = C; // First constant seen.
+ } else if (C && ArgumentConstants[i].first == C) {
+ // Still the constant value we think it is.
+ } else if (*AI == &*Arg) {
+ // Ignore recursive calls passing argument down.
+ } else {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+ ArgumentConstants[i].second = true;
}
}
+ }
// If we got to this point, there is a constant argument!
assert(NumNonconstant != ArgumentConstants.size());
- Function::arg_iterator AI = F.arg_begin();
bool MadeChange = false;
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI)
- // Do we have a constant argument!?
- if (!ArgumentConstants[i].second && !AI->use_empty()) {
- Value *V = ArgumentConstants[i].first;
- if (V == 0) V = UndefValue::get(AI->getType());
- AI->replaceAllUsesWith(V);
- ++NumArgumentsProped;
- MadeChange = true;
- }
+ Function::arg_iterator AI = F.arg_begin();
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+ // Do we have a constant argument?
+ if (ArgumentConstants[i].second || AI->use_empty())
+ continue;
+
+ Value *V = ArgumentConstants[i].first;
+ if (V == 0) V = UndefValue::get(AI->getType());
+ AI->replaceAllUsesWith(V);
+ ++NumArgumentsProped;
+ MadeChange = true;
+ }
return MadeChange;
}
-// Check to see if this function returns a constant. If so, replace all callers
-// that user the return value with the returned valued. If we can replace ALL
-// callers,
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
bool IPCP::PropagateConstantReturn(Function &F) {
if (F.getReturnType() == Type::VoidTy)
return false; // No return value.
+ // If this function could be overridden later in the link stage, we can't
+ // propagate information about its results into callers.
+ if (F.hasLinkOnceLinkage() || F.hasWeakLinkage())
+ return false;
+
// Check to see if this function returns a constant.
SmallVector<Value *,4> RetVals;
const StructType *STy = dyn_cast<StructType>(F.getReturnType());
if (STy)
- RetVals.assign(STy->getNumElements(), 0);
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
+ RetVals.push_back(UndefValue::get(STy->getElementType(i)));
else
- RetVals.push_back(0);
+ RetVals.push_back(UndefValue::get(F.getReturnType()));
+ unsigned NumNonConstant = 0;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- unsigned RetValsSize = RetVals.size();
- assert (RetValsSize == RI->getNumOperands() && "Invalid ReturnInst operands!");
- for (unsigned i = 0; i < RetValsSize; ++i) {
- if (isa<UndefValue>(RI->getOperand(i))) {
- // Ignore
- } else if (Constant *C = dyn_cast<Constant>(RI->getOperand(i))) {
- Value *RV = RetVals[i];
- if (RV == 0)
- RetVals[i] = C;
- else if (RV != C)
- return false; // Does not return the same constant.
- } else {
- return false; // Does not return a constant.
- }
- }
- }
-
- if (STy) {
- for (unsigned i = 0, e = RetVals.size(); i < e; ++i)
- if (RetVals[i] == 0)
- RetVals[i] = UndefValue::get(STy->getElementType(i));
- } else {
- if (RetVals.size() == 1)
- if (RetVals[0] == 0)
- RetVals[0] = UndefValue::get(F.getReturnType());
- }
-
- // If we got here, the function returns a constant value. Loop over all
- // users, replacing any uses of the return value with the returned constant.
- bool ReplacedAllUsers = true;
- bool MadeChange = false;
- for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I)
- if (!isa<Instruction>(*I))
- ReplacedAllUsers = false;
- else {
- CallSite CS = CallSite::get(cast<Instruction>(*I));
- if (CS.getInstruction() == 0 ||
- CS.getCalledFunction() != &F) {
- ReplacedAllUsers = false;
- } else {
- Instruction *Call = CS.getInstruction();
- if (!Call->use_empty()) {
- if (RetVals.size() == 1)
- Call->replaceAllUsesWith(RetVals[0]);
- else {
- for(Value::use_iterator CUI = Call->use_begin(), CUE = Call->use_end();
- CUI != CUE; ++CUI) {
- GetResultInst *GR = cast<GetResultInst>(CUI);
- if (RetVals[GR->getIndex()]) {
- GR->replaceAllUsesWith(RetVals[GR->getIndex()]);
- GR->eraseFromParent();
- }
+ // Return type does not match operand type, this is an old style multiple
+ // return
+ bool OldReturn = (F.getReturnType() != RI->getOperand(0)->getType());
+
+ for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+ // Already found conflicting return values?
+ Value *RV = RetVals[i];
+ if (!RV)
+ continue;
+
+ // Find the returned value
+ Value *V;
+ if (!STy || OldReturn)
+ V = RI->getOperand(i);
+ else
+ V = FindInsertedValue(RI->getOperand(0), i);
+
+ if (V) {
+ // Ignore undefs, we can change them into anything
+ if (isa<UndefValue>(V))
+ continue;
+
+ // Try to see if all the rets return the same constant or argument.
+ if (isa<Constant>(V) || isa<Argument>(V)) {
+ if (isa<UndefValue>(RV)) {
+ // No value found yet? Try the current one.
+ RetVals[i] = V;
+ continue;
}
+ // Returning the same value? Good.
+ if (RV == V)
+ continue;
}
- MadeChange = true;
}
+ // Different or no known return value? Don't propagate this return
+ // value.
+ RetVals[i] = 0;
+ // All values non constant? Stop looking.
+ if (++NumNonConstant == RetVals.size())
+ return false;
}
}
- // If we replace all users with the returned constant, and there can be no
- // other callers of the function, replace the constant being returned in the
- // function with an undef value.
- if (ReplacedAllUsers && F.hasInternalLinkage()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- for (unsigned i = 0, e = RetVals.size(); i < e; ++i) {
- Value *RetVal = RetVals[i];
- if (isa<UndefValue>(RetVal))
- continue;
- Value *RV = UndefValue::get(RetVal->getType());
- if (RI->getOperand(i) != RV) {
- RI->setOperand(i, RV);
- MadeChange = true;
- }
+ // If we got here, the function returns at least one constant value. Loop
+ // over all users, replacing any uses of the return value with the returned
+ // constant.
+ bool MadeChange = false;
+ for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ CallSite CS = CallSite::get(*UI);
+ Instruction* Call = CS.getInstruction();
+
+ // Not a call instruction or a call instruction that's not calling F
+ // directly?
+ if (!Call || UI.getOperandNo() != 0)
+ continue;
+
+ // Call result not used?
+ if (Call->use_empty())
+ continue;
+
+ MadeChange = true;
+
+ if (STy == 0) {
+ Value* New = RetVals[0];
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Call->replaceAllUsesWith(New);
+ continue;
+ }
+
+ for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+ I != E;) {
+ Instruction *Ins = dyn_cast<Instruction>(*I);
+
+ // Increment now, so we can remove the use
+ ++I;
+
+ // Not an instruction? Ignore
+ if (!Ins)
+ continue;
+
+ // Find the index of the retval to replace with
+ int index = -1;
+ if (GetResultInst *GR = dyn_cast<GetResultInst>(Ins))
+ index = GR->getIndex();
+ else if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+ if (EV->hasIndices())
+ index = *EV->idx_begin();
+
+ // If this use uses a specific return value, and we have a replacement,
+ // replace it.
+ if (index != -1) {
+ Value *New = RetVals[index];
+ if (New) {
+ if (Argument *A = dyn_cast<Argument>(New))
+ // Was an argument returned? Then find the corresponding argument in
+ // the call instruction and use that.
+ New = CS.getArgument(A->getArgNo());
+ Ins->replaceAllUsesWith(New);
+ Ins->eraseFromParent();
}
}
}
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/IndMemRemoval.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/IndMemRemoval.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/IndMemRemoval.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/IndMemRemoval.cpp Sun Jul 6 15:45:41 2008
@@ -37,10 +37,11 @@
virtual bool runOnModule(Module &M);
};
- char IndMemRemPass::ID = 0;
- RegisterPass<IndMemRemPass> X("indmemrem","Indirect Malloc and Free Removal");
} // end anonymous namespace
+char IndMemRemPass::ID = 0;
+static RegisterPass<IndMemRemPass>
+X("indmemrem","Indirect Malloc and Free Removal");
bool IndMemRemPass::runOnModule(Module &M) {
//in Theory, all direct calls of malloc and free should be promoted
@@ -71,7 +72,7 @@
GlobalValue::LinkOnceLinkage,
"malloc_llvm_bounce", &M);
BasicBlock* bb = BasicBlock::Create("entry",FN);
- Instruction* c = CastInst::createIntegerCast(
+ Instruction* c = CastInst::CreateIntegerCast(
FN->arg_begin(), Type::Int32Ty, false, "c", bb);
Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb);
ReturnInst::Create(a, bb);
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/InlineSimple.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/InlineSimple.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/InlineSimple.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/InlineSimple.cpp Sun Jul 6 15:45:41 2008
@@ -45,10 +45,12 @@
}
virtual bool doInitialization(CallGraph &CG);
};
- char SimpleInliner::ID = 0;
- RegisterPass<SimpleInliner> X("inline", "Function Integration/Inlining");
}
+char SimpleInliner::ID = 0;
+static RegisterPass<SimpleInliner>
+X("inline", "Function Integration/Inlining");
+
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
Pass *llvm::createFunctionInliningPass(int Threshold) {
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/Inliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/Inliner.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/Inliner.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/Inliner.cpp Sun Jul 6 15:45:41 2008
@@ -30,11 +30,9 @@
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
-namespace {
- cl::opt<int>
- InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
cl::desc("Control the amount of inlining to perform (default = 200)"));
-}
Inliner::Inliner(const void *ID)
: CallGraphSCCPass((intptr_t)ID), InlineThreshold(InlineLimit) {}
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/Internalize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/Internalize.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/Internalize.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/Internalize.cpp Sun Jul 6 15:45:41 2008
@@ -28,47 +28,47 @@
STATISTIC(NumFunctions, "Number of functions internalized");
STATISTIC(NumGlobals , "Number of global vars internalized");
-namespace {
-
- // APIFile - A file which contains a list of symbols that should not be marked
- // external.
- cl::opt<std::string>
- APIFile("internalize-public-api-file", cl::value_desc("filename"),
- cl::desc("A file containing list of symbol names to preserve"));
-
- // APIList - A list of symbols that should not be marked internal.
- cl::list<std::string>
- APIList("internalize-public-api-list", cl::value_desc("list"),
- cl::desc("A list of symbol names to preserve"),
- cl::CommaSeparated);
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"),
+ cl::CommaSeparated);
+namespace {
class VISIBILITY_HIDDEN InternalizePass : public ModulePass {
std::set<std::string> ExternalNames;
- bool DontInternalize;
+ /// If no api symbols were specified and a main function is defined,
+ /// assume the main function is the only API
+ bool AllButMain;
public:
static char ID; // Pass identification, replacement for typeid
- explicit InternalizePass(bool InternalizeEverything = true);
+ explicit InternalizePass(bool AllButMain = true);
explicit InternalizePass(const std::vector <const char *>& exportList);
void LoadFile(const char *Filename);
virtual bool runOnModule(Module &M);
};
- char InternalizePass::ID = 0;
- RegisterPass<InternalizePass> X("internalize", "Internalize Global Symbols");
} // end anonymous namespace
-InternalizePass::InternalizePass(bool InternalizeEverything)
- : ModulePass((intptr_t)&ID), DontInternalize(false){
- if (!APIFile.empty()) // If a filename is specified, use it
+char InternalizePass::ID = 0;
+static RegisterPass<InternalizePass>
+X("internalize", "Internalize Global Symbols");
+
+InternalizePass::InternalizePass(bool AllButMain)
+ : ModulePass((intptr_t)&ID), AllButMain(AllButMain){
+ if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
- else if (!APIList.empty()) // Else, if a list is specified, use it.
+ if (!APIList.empty()) // If a list is specified, use it as well.
ExternalNames.insert(APIList.begin(), APIList.end());
- else if (!InternalizeEverything)
- // Finally, if we're allowed to, internalize all but main.
- DontInternalize = true;
}
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
- : ModulePass((intptr_t)&ID), DontInternalize(false){
+ : ModulePass((intptr_t)&ID), AllButMain(false){
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
@@ -79,8 +79,9 @@
// Load the APIFile...
std::ifstream In(Filename);
if (!In.good()) {
- cerr << "WARNING: Internalize couldn't load file '" << Filename << "'!\n";
- return; // Do not internalize anything...
+ cerr << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
}
while (In) {
std::string Symbol;
@@ -91,13 +92,14 @@
}
bool InternalizePass::runOnModule(Module &M) {
- if (DontInternalize) return false;
-
- // If no list or file of symbols was specified, check to see if there is a
- // "main" symbol defined in the module. If so, use it, otherwise do not
- // internalize the module, it must be a library or something.
- //
if (ExternalNames.empty()) {
+ // Return if we're not in 'all but main' mode and have no external api
+ if (!AllButMain)
+ return false;
+ // If no list or file of symbols was specified, check to see if there is a
+ // "main" symbol defined in the module. If so, use it, otherwise do not
+ // internalize the module, it must be a library or something.
+ //
Function *MainFunc = M.getFunction("main");
if (MainFunc == 0 || MainFunc->isDeclaration())
return false; // No main found, must be a library...
@@ -108,7 +110,7 @@
bool Changed = false;
- // Found a main function, mark all functions not named main as internal.
+ // Mark all functions not in the api as internal.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isDeclaration() && // Function must be defined here
!I->hasInternalLinkage() && // Can't already have internal linkage
@@ -133,7 +135,8 @@
ExternalNames.insert("llvm.noinline");
ExternalNames.insert("llvm.global.annotations");
- // Mark all global variables with initializers as internal as well.
+ // Mark all global variables with initializers that are not in the api as
+ // internal as well.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
if (!I->isDeclaration() && !I->hasInternalLinkage() &&
@@ -147,8 +150,8 @@
return Changed;
}
-ModulePass *llvm::createInternalizePass(bool InternalizeEverything) {
- return new InternalizePass(InternalizeEverything);
+ModulePass *llvm::createInternalizePass(bool AllButMain) {
+ return new InternalizePass(AllButMain);
}
ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/LoopExtractor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/LoopExtractor.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/LoopExtractor.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/LoopExtractor.cpp Sun Jul 6 15:45:41 2008
@@ -52,22 +52,24 @@
AU.addRequired<LoopInfo>();
}
};
+}
- char LoopExtractor::ID = 0;
- RegisterPass<LoopExtractor>
- X("loop-extract", "Extract loops into new functions");
+char LoopExtractor::ID = 0;
+static RegisterPass<LoopExtractor>
+X("loop-extract", "Extract loops into new functions");
+namespace {
/// SingleLoopExtractor - For bugpoint.
struct SingleLoopExtractor : public LoopExtractor {
static char ID; // Pass identification, replacement for typeid
SingleLoopExtractor() : LoopExtractor(1) {}
};
-
- char SingleLoopExtractor::ID = 0;
- RegisterPass<SingleLoopExtractor>
- Y("loop-extract-single", "Extract at most one loop into a new function");
} // End anonymous namespace
+char SingleLoopExtractor::ID = 0;
+static RegisterPass<SingleLoopExtractor>
+Y("loop-extract-single", "Extract at most one loop into a new function");
+
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
//
@@ -146,14 +148,14 @@
}
-namespace {
- // BlockFile - A file which contains a list of blocks that should not be
- // extracted.
- cl::opt<std::string>
- BlockFile("extract-blocks-file", cl::value_desc("filename"),
- cl::desc("A file containing list of basic blocks to not extract"),
- cl::Hidden);
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of basic blocks to not extract"),
+ cl::Hidden);
+namespace {
/// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
/// from the module into their own functions except for those specified by the
/// BlocksToNotExtract list.
@@ -173,12 +175,12 @@
bool runOnModule(Module &M);
};
-
- char BlockExtractorPass::ID = 0;
- RegisterPass<BlockExtractorPass>
- XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)");
}
+char BlockExtractorPass::ID = 0;
+static RegisterPass<BlockExtractorPass>
+XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)");
+
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
//
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/LowerSetJmp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/LowerSetJmp.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/LowerSetJmp.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/LowerSetJmp.cpp Sun Jul 6 15:45:41 2008
@@ -122,11 +122,11 @@
bool runOnModule(Module& M);
bool doInitialization(Module& M);
};
-
- char LowerSetJmp::ID = 0;
- RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump");
} // end anonymous namespace
+char LowerSetJmp::ID = 0;
+static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump");
+
// run - Run the transformation on the program. We grab the function
// prototypes for longjmp and setjmp. If they are used in the program,
// then we can go directly to the places they're at and transform them.
@@ -341,29 +341,25 @@
if (SwitchValMap[Func].first) return SwitchValMap[Func];
BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func);
- BasicBlock::InstListType& LongJmpPreIL = LongJmpPre->getInstList();
// Keep track of the preliminary basic block for some of the other
// transformations.
PrelimBBMap[Func] = LongJmpPre;
// Grab the exception.
- CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept");
- LongJmpPreIL.push_back(Cond);
+ CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
// The "decision basic block" gets the number associated with the
// setjmp call returning to switch on and the value returned by
// longjmp.
BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func);
- BasicBlock::InstListType& DecisionBBIL = DecisionBB->getInstList();
BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
// Fill in the "decision" basic block.
- CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal");
- DecisionBBIL.push_back(LJVal);
- CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum");
- DecisionBBIL.push_back(SJNum);
+ CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
+ CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
+ DecisionBB);
SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
@@ -444,7 +440,7 @@
// Replace all uses of this instruction with the PHI node created by
// the eradication of setjmp.
Inst->replaceAllUsesWith(PHI);
- Inst->getParent()->getInstList().erase(Inst);
+ Inst->eraseFromParent();
++SetJmpsTransformed;
}
@@ -482,10 +478,10 @@
// Replace the old call inst with the invoke inst and remove the call.
CI.replaceAllUsesWith(II);
- CI.getParent()->getInstList().erase(&CI);
+ CI.eraseFromParent();
// The old terminator is useless now that we have the invoke inst.
- Term->getParent()->getInstList().erase(Term);
+ Term->eraseFromParent();
++CallsTransformed;
}
@@ -508,12 +504,11 @@
Function* Func = BB->getParent();
BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func);
- BasicBlock::InstListType& InstList = NewExceptBB->getInstList();
// If this is a longjmp exception, then branch to the preliminary BB of
// the longjmp exception handling. Otherwise, go to the old exception.
- CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept");
- InstList.push_back(IsLJExcept);
+ CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
+ NewExceptBB);
BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/PruneEH.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/PruneEH.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/PruneEH.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/PruneEH.cpp Sun Jul 6 15:45:41 2008
@@ -31,7 +31,6 @@
STATISTIC(NumRemoved, "Number of invokes removed");
STATISTIC(NumUnreach, "Number of noreturn calls optimized");
-STATISTIC(NumBBUnwind, "Number of unwind dest removed from blocks");
namespace {
struct VISIBILITY_HIDDEN PruneEH : public CallGraphSCCPass {
@@ -44,11 +43,12 @@
bool SimplifyFunction(Function *F);
void DeleteBasicBlock(BasicBlock *BB);
};
-
- char PruneEH::ID = 0;
- RegisterPass<PruneEH> X("prune-eh", "Remove unused exception handling info");
}
+char PruneEH::ID = 0;
+static RegisterPass<PruneEH>
+X("prune-eh", "Remove unused exception handling info");
+
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
@@ -64,6 +64,8 @@
// Next, check to see if any callees might throw or if there are any external
// functions in this SCC: if so, we cannot prune any functions in this SCC.
+ // Definitions that are weak and not declared non-throwing might be
+ // overridden at linktime with something that throws, so assume that.
// If this SCC includes the unwind instruction, we KNOW it throws, so
// obviously the SCC might throw.
//
@@ -74,7 +76,7 @@
if (F == 0) {
SCCMightUnwind = true;
SCCMightReturn = true;
- } else if (F->isDeclaration()) {
+ } else if (F->isDeclaration() || F->hasWeakLinkage()) {
SCCMightUnwind |= !F->doesNotThrow();
SCCMightReturn |= !F->doesNotReturn();
} else {
@@ -152,8 +154,6 @@
bool PruneEH::SimplifyFunction(Function *F) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- bool couldUnwind = false;
-
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
if (II->doesNotThrow()) {
SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
@@ -183,12 +183,10 @@
++NumRemoved;
MadeChange = true;
- } else {
- couldUnwind = true;
}
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
- if (CallInst *CI = dyn_cast<CallInst>(I++)) {
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
// This call calls a function that cannot return. Insert an
// unreachable instruction after it and simplify the code. Do this
@@ -204,19 +202,9 @@
MadeChange = true;
++NumUnreach;
break;
- } else if (!CI->doesNotThrow()) {
- couldUnwind = true;
}
- }
-
- // Strip 'unwindTo' off of BBs that have no calls/invokes without nounwind.
- if (!couldUnwind && BB->getUnwindDest()) {
- MadeChange = true;
- ++NumBBUnwind;
- BB->getUnwindDest()->removePredecessor(BB, false, true);
- BB->setUnwindDest(NULL);
- }
}
+
return MadeChange;
}
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/RaiseAllocations.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/RaiseAllocations.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/RaiseAllocations.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/RaiseAllocations.cpp Sun Jul 6 15:45:41 2008
@@ -48,12 +48,11 @@
//
bool runOnModule(Module &M);
};
-
- char RaiseAllocations::ID = 0;
- RegisterPass<RaiseAllocations>
- X("raiseallocs", "Raise allocations from calls to instructions");
} // end anonymous namespace
+char RaiseAllocations::ID = 0;
+static RegisterPass<RaiseAllocations>
+X("raiseallocs", "Raise allocations from calls to instructions");
// createRaiseAllocationsPass - The interface to this file...
ModulePass *llvm::createRaiseAllocationsPass() {
@@ -165,7 +164,7 @@
// source size.
if (Source->getType() != Type::Int32Ty)
Source =
- CastInst::createIntegerCast(Source, Type::Int32Ty, false/*ZExt*/,
+ CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/,
"MallocAmtCast", I);
MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I);
@@ -178,7 +177,7 @@
BranchInst::Create(II->getNormalDest(), I);
// Delete the old call site
- MI->getParent()->getInstList().erase(I);
+ I->eraseFromParent();
Changed = true;
++NumRaised;
}
Removed: llvm/branches/non-call-eh/lib/Transforms/IPO/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/SimplifyLibCalls.cpp?rev=53162&view=auto
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/SimplifyLibCalls.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/SimplifyLibCalls.cpp (removed)
@@ -1,2085 +0,0 @@
-//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a module pass that applies a variety of small
-// optimizations for calls to specific well-known function calls (e.g. runtime
-// library functions). For example, a call to the function "exit(3)" that
-// occurs within the main() function can be transformed into a simple "return 3"
-// instruction. Any optimization that takes this form (replace call to library
-// function with simpler code that provides the same result) belongs in this
-// file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Config/config.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/IPO.h"
-#include <cstring>
-using namespace llvm;
-
-/// This statistic keeps track of the total number of library calls that have
-/// been simplified regardless of which call it is.
-STATISTIC(SimplifiedLibCalls, "Number of library calls simplified");
-
-namespace {
- // Forward declarations
- class LibCallOptimization;
- class SimplifyLibCalls;
-
-/// This list is populated by the constructor for LibCallOptimization class.
-/// Therefore all subclasses are registered here at static initialization time
-/// and this list is what the SimplifyLibCalls pass uses to apply the individual
-/// optimizations to the call sites.
-/// @brief The list of optimizations deriving from LibCallOptimization
-static LibCallOptimization *OptList = 0;
-
-/// This class is the abstract base class for the set of optimizations that
-/// corresponds to one library call. The SimplifyLibCalls pass will call the
-/// ValidateCalledFunction method to ask the optimization if a given Function
-/// is the kind that the optimization can handle. If the subclass returns true,
-/// then SImplifyLibCalls will also call the OptimizeCall method to perform,
-/// or attempt to perform, the optimization(s) for the library call. Otherwise,
-/// OptimizeCall won't be called. Subclasses are responsible for providing the
-/// name of the library call (strlen, strcpy, etc.) to the LibCallOptimization
-/// constructor. This is used to efficiently select which call instructions to
-/// optimize. The criteria for a "lib call" is "anything with well known
-/// semantics", typically a library function that is defined by an international
-/// standard. Because the semantics are well known, the optimizations can
-/// generally short-circuit actually calling the function if there's a simpler
-/// way (e.g. strlen(X) can be reduced to a constant if X is a constant global).
-/// @brief Base class for library call optimizations
-class VISIBILITY_HIDDEN LibCallOptimization {
- LibCallOptimization **Prev, *Next;
- const char *FunctionName; ///< Name of the library call we optimize
-#ifndef NDEBUG
- Statistic occurrences; ///< debug statistic (-debug-only=simplify-libcalls)
-#endif
-public:
- /// The \p fname argument must be the name of the library function being
- /// optimized by the subclass.
- /// @brief Constructor that registers the optimization.
- LibCallOptimization(const char *FName, const char *Description)
- : FunctionName(FName) {
-
-#ifndef NDEBUG
- occurrences.construct("simplify-libcalls", Description);
-#endif
- // Register this optimizer in the list of optimizations.
- Next = OptList;
- OptList = this;
- Prev = &OptList;
- if (Next) Next->Prev = &Next;
- }
-
- /// getNext - All libcall optimizations are chained together into a list,
- /// return the next one in the list.
- LibCallOptimization *getNext() { return Next; }
-
- /// @brief Deregister from the optlist
- virtual ~LibCallOptimization() {
- *Prev = Next;
- if (Next) Next->Prev = Prev;
- }
-
- /// The implementation of this function in subclasses should determine if
- /// \p F is suitable for the optimization. This method is called by
- /// SimplifyLibCalls::runOnModule to short circuit visiting all the call
- /// sites of such a function if that function is not suitable in the first
- /// place. If the called function is suitabe, this method should return true;
- /// false, otherwise. This function should also perform any lazy
- /// initialization that the LibCallOptimization needs to do, if its to return
- /// true. This avoids doing initialization until the optimizer is actually
- /// going to be called upon to do some optimization.
- /// @brief Determine if the function is suitable for optimization
- virtual bool ValidateCalledFunction(
- const Function* F, ///< The function that is the target of call sites
- SimplifyLibCalls& SLC ///< The pass object invoking us
- ) = 0;
-
- /// The implementations of this function in subclasses is the heart of the
- /// SimplifyLibCalls algorithm. Sublcasses of this class implement
- /// OptimizeCall to determine if (a) the conditions are right for optimizing
- /// the call and (b) to perform the optimization. If an action is taken
- /// against ci, the subclass is responsible for returning true and ensuring
- /// that ci is erased from its parent.
- /// @brief Optimize a call, if possible.
- virtual bool OptimizeCall(
- CallInst* ci, ///< The call instruction that should be optimized.
- SimplifyLibCalls& SLC ///< The pass object invoking us
- ) = 0;
-
- /// @brief Get the name of the library call being optimized
- const char *getFunctionName() const { return FunctionName; }
-
- bool ReplaceCallWith(CallInst *CI, Value *V) {
- if (!CI->use_empty())
- CI->replaceAllUsesWith(V);
- CI->eraseFromParent();
- return true;
- }
-
- /// @brief Called by SimplifyLibCalls to update the occurrences statistic.
- void succeeded() {
-#ifndef NDEBUG
- DEBUG(++occurrences);
-#endif
- }
-};
-
-/// This class is an LLVM Pass that applies each of the LibCallOptimization
-/// instances to all the call sites in a module, relatively efficiently. The
-/// purpose of this pass is to provide optimizations for calls to well-known
-/// functions with well-known semantics, such as those in the c library. The
-/// class provides the basic infrastructure for handling runOnModule. Whenever
-/// this pass finds a function call, it asks the appropriate optimizer to
-/// validate the call (ValidateLibraryCall). If it is validated, then
-/// the OptimizeCall method is also called.
-/// @brief A ModulePass for optimizing well-known function calls.
-class VISIBILITY_HIDDEN SimplifyLibCalls : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
- SimplifyLibCalls() : ModulePass((intptr_t)&ID) {}
-
- /// We need some target data for accurate signature details that are
- /// target dependent. So we require target data in our AnalysisUsage.
- /// @brief Require TargetData from AnalysisUsage.
- virtual void getAnalysisUsage(AnalysisUsage& Info) const {
- // Ask that the TargetData analysis be performed before us so we can use
- // the target data.
- Info.addRequired<TargetData>();
- }
-
- /// For this pass, process all of the function calls in the module, calling
- /// ValidateLibraryCall and OptimizeCall as appropriate.
- /// @brief Run all the lib call optimizations on a Module.
- virtual bool runOnModule(Module &M) {
- reset(M);
-
- bool result = false;
- StringMap<LibCallOptimization*> OptznMap;
- for (LibCallOptimization *Optzn = OptList; Optzn; Optzn = Optzn->getNext())
- OptznMap[Optzn->getFunctionName()] = Optzn;
-
- // The call optimizations can be recursive. That is, the optimization might
- // generate a call to another function which can also be optimized. This way
- // we make the LibCallOptimization instances very specific to the case they
- // handle. It also means we need to keep running over the function calls in
- // the module until we don't get any more optimizations possible.
- bool found_optimization = false;
- do {
- found_optimization = false;
- for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- // All the "well-known" functions are external and have external linkage
- // because they live in a runtime library somewhere and were (probably)
- // not compiled by LLVM. So, we only act on external functions that
- // have external or dllimport linkage and non-empty uses.
- if (!FI->isDeclaration() ||
- !(FI->hasExternalLinkage() || FI->hasDLLImportLinkage()) ||
- FI->use_empty())
- continue;
-
- // Get the optimization class that pertains to this function
- StringMap<LibCallOptimization*>::iterator OMI =
- OptznMap.find(FI->getName());
- if (OMI == OptznMap.end()) continue;
-
- LibCallOptimization *CO = OMI->second;
-
- // Make sure the called function is suitable for the optimization
- if (!CO->ValidateCalledFunction(FI, *this))
- continue;
-
- // Loop over each of the uses of the function
- for (Value::use_iterator UI = FI->use_begin(), UE = FI->use_end();
- UI != UE ; ) {
- // If the use of the function is a call instruction
- if (CallInst* CI = dyn_cast<CallInst>(*UI++)) {
- // Do the optimization on the LibCallOptimization.
- if (CO->OptimizeCall(CI, *this)) {
- ++SimplifiedLibCalls;
- found_optimization = result = true;
- CO->succeeded();
- }
- }
- }
- }
- } while (found_optimization);
-
- return result;
- }
-
- /// @brief Return the *current* module we're working on.
- Module* getModule() const { return M; }
-
- /// @brief Return the *current* target data for the module we're working on.
- TargetData* getTargetData() const { return TD; }
-
- /// @brief Return the size_t type -- syntactic shortcut
- const Type* getIntPtrType() const { return TD->getIntPtrType(); }
-
- /// @brief Return a Function* for the putchar libcall
- Constant *get_putchar() {
- if (!putchar_func)
- putchar_func =
- M->getOrInsertFunction("putchar", Type::Int32Ty, Type::Int32Ty, NULL);
- return putchar_func;
- }
-
- /// @brief Return a Function* for the puts libcall
- Constant *get_puts() {
- if (!puts_func)
- puts_func = M->getOrInsertFunction("puts", Type::Int32Ty,
- PointerType::getUnqual(Type::Int8Ty),
- NULL);
- return puts_func;
- }
-
- /// @brief Return a Function* for the fputc libcall
- Constant *get_fputc(const Type* FILEptr_type) {
- if (!fputc_func)
- fputc_func = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
- FILEptr_type, NULL);
- return fputc_func;
- }
-
- /// @brief Return a Function* for the fputs libcall
- Constant *get_fputs(const Type* FILEptr_type) {
- if (!fputs_func)
- fputs_func = M->getOrInsertFunction("fputs", Type::Int32Ty,
- PointerType::getUnqual(Type::Int8Ty),
- FILEptr_type, NULL);
- return fputs_func;
- }
-
- /// @brief Return a Function* for the fwrite libcall
- Constant *get_fwrite(const Type* FILEptr_type) {
- if (!fwrite_func)
- fwrite_func = M->getOrInsertFunction("fwrite", TD->getIntPtrType(),
- PointerType::getUnqual(Type::Int8Ty),
- TD->getIntPtrType(),
- TD->getIntPtrType(),
- FILEptr_type, NULL);
- return fwrite_func;
- }
-
- /// @brief Return a Function* for the sqrt libcall
- Constant *get_sqrt() {
- if (!sqrt_func)
- sqrt_func = M->getOrInsertFunction("sqrt", Type::DoubleTy,
- Type::DoubleTy, NULL);
- return sqrt_func;
- }
-
- /// @brief Return a Function* for the strcpy libcall
- Constant *get_strcpy() {
- if (!strcpy_func)
- strcpy_func = M->getOrInsertFunction("strcpy",
- PointerType::getUnqual(Type::Int8Ty),
- PointerType::getUnqual(Type::Int8Ty),
- PointerType::getUnqual(Type::Int8Ty),
- NULL);
- return strcpy_func;
- }
-
- /// @brief Return a Function* for the strlen libcall
- Constant *get_strlen() {
- if (!strlen_func)
- strlen_func = M->getOrInsertFunction("strlen", TD->getIntPtrType(),
- PointerType::getUnqual(Type::Int8Ty),
- NULL);
- return strlen_func;
- }
-
- /// @brief Return a Function* for the memchr libcall
- Constant *get_memchr() {
- if (!memchr_func)
- memchr_func = M->getOrInsertFunction("memchr",
- PointerType::getUnqual(Type::Int8Ty),
- PointerType::getUnqual(Type::Int8Ty),
- Type::Int32Ty, TD->getIntPtrType(),
- NULL);
- return memchr_func;
- }
-
- /// @brief Return a Function* for the memcpy libcall
- Constant *get_memcpy() {
- if (!memcpy_func) {
- Intrinsic::ID IID = (TD->getIntPtrType() == Type::Int32Ty) ?
- Intrinsic::memcpy_i32 : Intrinsic::memcpy_i64;
- memcpy_func = Intrinsic::getDeclaration(M, IID);
- }
- return memcpy_func;
- }
-
- Constant *getUnaryFloatFunction(const char *BaseName, const Type *T = 0) {
- if (T == 0) T = Type::FloatTy;
-
- char NameBuffer[20];
- const char *Name;
- if (T == Type::DoubleTy)
- Name = BaseName; // floor
- else {
- Name = NameBuffer;
- unsigned NameLen = strlen(BaseName);
- assert(NameLen < sizeof(NameBuffer)-2 && "Buffer too small");
- memcpy(NameBuffer, BaseName, NameLen);
- if (T == Type::FloatTy)
- NameBuffer[NameLen] = 'f'; // floorf
- else
- NameBuffer[NameLen] = 'l'; // floorl
- NameBuffer[NameLen+1] = 0;
- }
-
- return M->getOrInsertFunction(Name, T, T, NULL);
- }
-
- Constant *get_floorf() { return getUnaryFloatFunction("floor"); }
- Constant *get_ceilf() { return getUnaryFloatFunction( "ceil"); }
- Constant *get_roundf() { return getUnaryFloatFunction("round"); }
- Constant *get_rintf() { return getUnaryFloatFunction( "rint"); }
- Constant *get_nearbyintf() { return getUnaryFloatFunction("nearbyint"); }
-
-
-
-private:
- /// @brief Reset our cached data for a new Module
- void reset(Module& mod) {
- M = &mod;
- TD = &getAnalysis<TargetData>();
- putchar_func = 0;
- puts_func = 0;
- fputc_func = 0;
- fputs_func = 0;
- fwrite_func = 0;
- memcpy_func = 0;
- memchr_func = 0;
- sqrt_func = 0;
- strcpy_func = 0;
- strlen_func = 0;
- }
-
-private:
- /// Caches for function pointers.
- Constant *putchar_func, *puts_func;
- Constant *fputc_func, *fputs_func, *fwrite_func;
- Constant *memcpy_func, *memchr_func;
- Constant *sqrt_func;
- Constant *strcpy_func, *strlen_func;
- Module *M; ///< Cached Module
- TargetData *TD; ///< Cached TargetData
-};
-
-char SimplifyLibCalls::ID = 0;
-// Register the pass
-RegisterPass<SimplifyLibCalls>
-X("simplify-libcalls", "Simplify well-known library calls");
-
-} // anonymous namespace
-
-// The only public symbol in this file which just instantiates the pass object
-ModulePass *llvm::createSimplifyLibCallsPass() {
- return new SimplifyLibCalls();
-}
-
-// Classes below here, in the anonymous namespace, are all subclasses of the
-// LibCallOptimization class, each implementing all optimizations possible for a
-// single well-known library call. Each has a static singleton instance that
-// auto registers it into the "optlist" global above.
-namespace {
-
-// Forward declare utility functions.
-static bool GetConstantStringInfo(Value *V, std::string &Str);
-static Value *CastToCStr(Value *V, Instruction *IP);
-
-/// This LibCallOptimization will find instances of a call to "exit" that occurs
-/// within the "main" function and change it to a simple "ret" instruction with
-/// the same value passed to the exit function. When this is done, it splits the
-/// basic block at the exit(3) call and deletes the call instruction.
-/// @brief Replace calls to exit in main with a simple return
-struct VISIBILITY_HIDDEN ExitInMainOptimization : public LibCallOptimization {
- ExitInMainOptimization() : LibCallOptimization("exit",
- "Number of 'exit' calls simplified") {}
-
- // Make sure the called function looks like exit (int argument, int return
- // type, external linkage, not varargs).
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- return F->arg_size() >= 1 && F->arg_begin()->getType()->isInteger();
- }
-
- virtual bool OptimizeCall(CallInst* ci, SimplifyLibCalls& SLC) {
- // To be careful, we check that the call to exit is coming from "main", that
- // main has external linkage, and the return type of main and the argument
- // to exit have the same type.
- Function *from = ci->getParent()->getParent();
- if (from->hasExternalLinkage())
- if (from->getReturnType() == ci->getOperand(1)->getType()
- && !isa<StructType>(from->getReturnType()))
- if (from->getName() == "main") {
- // Okay, time to actually do the optimization. First, get the basic
- // block of the call instruction
- BasicBlock* bb = ci->getParent();
-
- // Create a return instruction that we'll replace the call with.
- // Note that the argument of the return is the argument of the call
- // instruction.
- ReturnInst::Create(ci->getOperand(1), ci);
-
- // Split the block at the call instruction which places it in a new
- // basic block.
- bb->splitBasicBlock(ci);
-
- // The block split caused a branch instruction to be inserted into
- // the end of the original block, right after the return instruction
- // that we put there. That's not a valid block, so delete the branch
- // instruction.
- bb->getInstList().pop_back();
-
- // Now we can finally get rid of the call instruction which now lives
- // in the new basic block.
- ci->eraseFromParent();
-
- // Optimization succeeded, return true.
- return true;
- }
- // We didn't pass the criteria for this optimization so return false
- return false;
- }
-} ExitInMainOptimizer;
-
-/// This LibCallOptimization will simplify a call to the strcat library
-/// function. The simplification is possible only if the string being
-/// concatenated is a constant array or a constant expression that results in
-/// a constant string. In this case we can replace it with strlen + llvm.memcpy
-/// of the constant string. Both of these calls are further reduced, if possible
-/// on subsequent passes.
-/// @brief Simplify the strcat library function.
-struct VISIBILITY_HIDDEN StrCatOptimization : public LibCallOptimization {
-public:
- /// @brief Default constructor
- StrCatOptimization() : LibCallOptimization("strcat",
- "Number of 'strcat' calls simplified") {}
-
-public:
-
- /// @brief Make sure that the "strcat" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 2 &&
- FT->getReturnType() == PointerType::getUnqual(Type::Int8Ty) &&
- FT->getParamType(0) == FT->getReturnType() &&
- FT->getParamType(1) == FT->getReturnType();
- }
-
- /// @brief Optimize the strcat library function
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
-
- // Extract the initializer (while making numerous checks) from the
- // source operand of the call to strcat.
- std::string SrcStr;
- if (!GetConstantStringInfo(Src, SrcStr))
- return false;
-
- // Handle the simple, do-nothing case
- if (SrcStr.empty())
- return ReplaceCallWith(CI, Dst);
-
- // We need to find the end of the destination string. That's where the
- // memory is to be moved to. We just generate a call to strlen.
- CallInst *DstLen = CallInst::Create(SLC.get_strlen(), Dst,
- Dst->getName()+".len", CI);
-
- // Now that we have the destination's length, we must index into the
- // destination's pointer to get the actual memcpy destination (end of
- // the string .. we're concatenating).
- Dst = GetElementPtrInst::Create(Dst, DstLen, Dst->getName()+".indexed", CI);
-
- // We have enough information to now generate the memcpy call to
- // do the concatenation for us.
- Value *Vals[] = {
- Dst, Src,
- ConstantInt::get(SLC.getIntPtrType(), SrcStr.size()+1), // copy nul byte.
- ConstantInt::get(Type::Int32Ty, 1) // alignment
- };
- CallInst::Create(SLC.get_memcpy(), Vals, Vals + 4, "", CI);
-
- return ReplaceCallWith(CI, Dst);
- }
-} StrCatOptimizer;
-
-/// This LibCallOptimization will simplify a call to the strchr library
-/// function. It optimizes out cases where the arguments are both constant
-/// and the result can be determined statically.
-/// @brief Simplify the strcmp library function.
-struct VISIBILITY_HIDDEN StrChrOptimization : public LibCallOptimization {
-public:
- StrChrOptimization() : LibCallOptimization("strchr",
- "Number of 'strchr' calls simplified") {}
-
- /// @brief Make sure that the "strchr" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 2 &&
- FT->getReturnType() == PointerType::getUnqual(Type::Int8Ty) &&
- FT->getParamType(0) == FT->getReturnType() &&
- isa<IntegerType>(FT->getParamType(1));
- }
-
- /// @brief Perform the strchr optimizations
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // Check that the first argument to strchr is a constant array of sbyte.
- std::string Str;
- if (!GetConstantStringInfo(CI->getOperand(1), Str))
- return false;
-
- // If the second operand is not constant, just lower this to memchr since we
- // know the length of the input string.
- ConstantInt *CSI = dyn_cast<ConstantInt>(CI->getOperand(2));
- if (!CSI) {
- Value *Args[3] = {
- CI->getOperand(1),
- CI->getOperand(2),
- ConstantInt::get(SLC.getIntPtrType(), Str.size()+1)
- };
- return ReplaceCallWith(CI, CallInst::Create(SLC.get_memchr(), Args, Args + 3,
- CI->getName(), CI));
- }
-
- // strchr can find the nul character.
- Str += '\0';
-
- // Get the character we're looking for
- char CharValue = CSI->getSExtValue();
-
- // Compute the offset
- uint64_t i = 0;
- while (1) {
- if (i == Str.size()) // Didn't find the char. strchr returns null.
- return ReplaceCallWith(CI, Constant::getNullValue(CI->getType()));
- // Did we find our match?
- if (Str[i] == CharValue)
- break;
- ++i;
- }
-
- // strchr(s+n,c) -> gep(s+n+i,c)
- // (if c is a constant integer and s is a constant string)
- Value *Idx = ConstantInt::get(Type::Int64Ty, i);
- Value *GEP = GetElementPtrInst::Create(CI->getOperand(1), Idx,
- CI->getOperand(1)->getName() +
- ".strchr", CI);
- return ReplaceCallWith(CI, GEP);
- }
-} StrChrOptimizer;
-
-/// This LibCallOptimization will simplify a call to the strcmp library
-/// function. It optimizes out cases where one or both arguments are constant
-/// and the result can be determined statically.
-/// @brief Simplify the strcmp library function.
-struct VISIBILITY_HIDDEN StrCmpOptimization : public LibCallOptimization {
-public:
- StrCmpOptimization() : LibCallOptimization("strcmp",
- "Number of 'strcmp' calls simplified") {}
-
- /// @brief Make sure that the "strcmp" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getReturnType() == Type::Int32Ty && FT->getNumParams() == 2 &&
- FT->getParamType(0) == FT->getParamType(1) &&
- FT->getParamType(0) == PointerType::getUnqual(Type::Int8Ty);
- }
-
- /// @brief Perform the strcmp optimization
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // First, check to see if src and destination are the same. If they are,
- // then the optimization is to replace the CallInst with a constant 0
- // because the call is a no-op.
- Value *Str1P = CI->getOperand(1);
- Value *Str2P = CI->getOperand(2);
- if (Str1P == Str2P) // strcmp(x,x) -> 0
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0));
-
- std::string Str1;
- if (!GetConstantStringInfo(Str1P, Str1))
- return false;
- if (Str1.empty()) {
- // strcmp("", x) -> *x
- Value *V = new LoadInst(Str2P, CI->getName()+".load", CI);
- V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI);
- return ReplaceCallWith(CI, V);
- }
-
- std::string Str2;
- if (!GetConstantStringInfo(Str2P, Str2))
- return false;
- if (Str2.empty()) {
- // strcmp(x,"") -> *x
- Value *V = new LoadInst(Str1P, CI->getName()+".load", CI);
- V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI);
- return ReplaceCallWith(CI, V);
- }
-
- // strcmp(x, y) -> cnst (if both x and y are constant strings)
- int R = strcmp(Str1.c_str(), Str2.c_str());
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), R));
- }
-} StrCmpOptimizer;
-
-/// This LibCallOptimization will simplify a call to the strncmp library
-/// function. It optimizes out cases where one or both arguments are constant
-/// and the result can be determined statically.
-/// @brief Simplify the strncmp library function.
-struct VISIBILITY_HIDDEN StrNCmpOptimization : public LibCallOptimization {
-public:
- StrNCmpOptimization() : LibCallOptimization("strncmp",
- "Number of 'strncmp' calls simplified") {}
-
- /// @brief Make sure that the "strncmp" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getReturnType() == Type::Int32Ty && FT->getNumParams() == 3 &&
- FT->getParamType(0) == FT->getParamType(1) &&
- FT->getParamType(0) == PointerType::getUnqual(Type::Int8Ty) &&
- isa<IntegerType>(FT->getParamType(2));
- return false;
- }
-
- /// @brief Perform the strncmp optimization
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // First, check to see if src and destination are the same. If they are,
- // then the optimization is to replace the CallInst with a constant 0
- // because the call is a no-op.
- Value *Str1P = CI->getOperand(1);
- Value *Str2P = CI->getOperand(2);
- if (Str1P == Str2P) // strncmp(x,x, n) -> 0
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0));
-
- // Check the length argument, if it is Constant zero then the strings are
- // considered equal.
- uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
- Length = LengthArg->getZExtValue();
- else
- return false;
-
- if (Length == 0) // strncmp(x,y,0) -> 0
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0));
-
- std::string Str1;
- if (!GetConstantStringInfo(Str1P, Str1))
- return false;
- if (Str1.empty()) {
- // strncmp("", x, n) -> *x
- Value *V = new LoadInst(Str2P, CI->getName()+".load", CI);
- V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI);
- return ReplaceCallWith(CI, V);
- }
-
- std::string Str2;
- if (!GetConstantStringInfo(Str2P, Str2))
- return false;
- if (Str2.empty()) {
- // strncmp(x, "", n) -> *x
- Value *V = new LoadInst(Str1P, CI->getName()+".load", CI);
- V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI);
- return ReplaceCallWith(CI, V);
- }
-
- // strncmp(x, y, n) -> cnst (if both x and y are constant strings)
- int R = strncmp(Str1.c_str(), Str2.c_str(), Length);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), R));
- }
-} StrNCmpOptimizer;
-
-/// This LibCallOptimization will simplify a call to the strcpy library
-/// function. Two optimizations are possible:
-/// (1) If src and dest are the same and not volatile, just return dest
-/// (2) If the src is a constant then we can convert to llvm.memmove
-/// @brief Simplify the strcpy library function.
-struct VISIBILITY_HIDDEN StrCpyOptimization : public LibCallOptimization {
-public:
- StrCpyOptimization() : LibCallOptimization("strcpy",
- "Number of 'strcpy' calls simplified") {}
-
- /// @brief Make sure that the "strcpy" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 2 &&
- FT->getParamType(0) == FT->getParamType(1) &&
- FT->getReturnType() == FT->getParamType(0) &&
- FT->getParamType(0) == PointerType::getUnqual(Type::Int8Ty);
- }
-
- /// @brief Perform the strcpy optimization
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // First, check to see if src and destination are the same. If they are,
- // then the optimization is to replace the CallInst with the destination
- // because the call is a no-op. Note that this corresponds to the
- // degenerate strcpy(X,X) case which should have "undefined" results
- // according to the C specification. However, it occurs sometimes and
- // we optimize it as a no-op.
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
- if (Dst == Src) {
- // strcpy(x, x) -> x
- return ReplaceCallWith(CI, Dst);
- }
-
- // Get the length of the constant string referenced by the Src operand.
- std::string SrcStr;
- if (!GetConstantStringInfo(Src, SrcStr))
- return false;
-
- // If the constant string's length is zero we can optimize this by just
- // doing a store of 0 at the first byte of the destination
- if (SrcStr.empty()) {
- new StoreInst(ConstantInt::get(Type::Int8Ty, 0), Dst, CI);
- return ReplaceCallWith(CI, Dst);
- }
-
- // We have enough information to now generate the memcpy call to
- // do the concatenation for us.
- Value *MemcpyOps[] = {
- Dst, Src, // Pass length including nul byte.
- ConstantInt::get(SLC.getIntPtrType(), SrcStr.size()+1),
- ConstantInt::get(Type::Int32Ty, 1) // alignment
- };
- CallInst::Create(SLC.get_memcpy(), MemcpyOps, MemcpyOps + 4, "", CI);
-
- return ReplaceCallWith(CI, Dst);
- }
-} StrCpyOptimizer;
-
-/// This LibCallOptimization will simplify a call to the strlen library
-/// function by replacing it with a constant value if the string provided to
-/// it is a constant array.
-/// @brief Simplify the strlen library function.
-struct VISIBILITY_HIDDEN StrLenOptimization : public LibCallOptimization {
- StrLenOptimization() : LibCallOptimization("strlen",
- "Number of 'strlen' calls simplified") {}
-
- /// @brief Make sure that the "strlen" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 1 &&
- FT->getParamType(0) == PointerType::getUnqual(Type::Int8Ty) &&
- isa<IntegerType>(FT->getReturnType());
- }
-
- /// @brief Perform the strlen optimization
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // Make sure we're dealing with an sbyte* here.
- Value *Src = CI->getOperand(1);
-
- // Does the call to strlen have exactly one use?
- if (CI->hasOneUse()) {
- // Is that single use a icmp operator?
- if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CI->use_back()))
- // Is it compared against a constant integer?
- if (ConstantInt *Cst = dyn_cast<ConstantInt>(Cmp->getOperand(1))) {
- // If its compared against length 0 with == or !=
- if (Cst->getZExtValue() == 0 && Cmp->isEquality()) {
- // strlen(x) != 0 -> *x != 0
- // strlen(x) == 0 -> *x == 0
- Value *V = new LoadInst(Src, Src->getName()+".first", CI);
- V = new ICmpInst(Cmp->getPredicate(), V,
- ConstantInt::get(Type::Int8Ty, 0),
- Cmp->getName()+".strlen", CI);
- Cmp->replaceAllUsesWith(V);
- Cmp->eraseFromParent();
- return ReplaceCallWith(CI, 0); // no uses.
- }
- }
- }
-
- // Get the length of the constant string operand
- std::string Str;
- if (!GetConstantStringInfo(Src, Str))
- return false;
-
- // strlen("xyz") -> 3 (for example)
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), Str.size()));
- }
-} StrLenOptimizer;
-
-/// IsOnlyUsedInEqualsComparison - Return true if it only matters that the value
-/// is equal or not-equal to zero.
-static bool IsOnlyUsedInEqualsZeroComparison(Instruction *I) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
- if (IC->isEquality())
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
-/// This memcmpOptimization will simplify a call to the memcmp library
-/// function.
-struct VISIBILITY_HIDDEN memcmpOptimization : public LibCallOptimization {
- /// @brief Default Constructor
- memcmpOptimization()
- : LibCallOptimization("memcmp", "Number of 'memcmp' calls simplified") {}
-
- /// @brief Make sure that the "memcmp" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &TD) {
- Function::const_arg_iterator AI = F->arg_begin();
- if (F->arg_size() != 3 || !isa<PointerType>(AI->getType())) return false;
- if (!isa<PointerType>((++AI)->getType())) return false;
- if (!(++AI)->getType()->isInteger()) return false;
- if (!F->getReturnType()->isInteger()) return false;
- return true;
- }
-
- /// Because of alignment and instruction information that we don't have, we
- /// leave the bulk of this to the code generators.
- ///
- /// Note that we could do much more if we could force alignment on otherwise
- /// small aligned allocas, or if we could indicate that loads have a small
- /// alignment.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &TD) {
- Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
-
- // If the two operands are the same, return zero.
- if (LHS == RHS) {
- // memcmp(s,s,x) -> 0
- return ReplaceCallWith(CI, Constant::getNullValue(CI->getType()));
- }
-
- // Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
- if (!LenC) return false;
- uint64_t Len = LenC->getZExtValue();
-
- // If the length is zero, this returns 0.
- switch (Len) {
- case 0:
- // memcmp(s1,s2,0) -> 0
- return ReplaceCallWith(CI, Constant::getNullValue(CI->getType()));
- case 1: {
- // memcmp(S1,S2,1) -> *(ubyte*)S1 - *(ubyte*)S2
- const Type *UCharPtr = PointerType::getUnqual(Type::Int8Ty);
- CastInst *Op1Cast = CastInst::create(
- Instruction::BitCast, LHS, UCharPtr, LHS->getName(), CI);
- CastInst *Op2Cast = CastInst::create(
- Instruction::BitCast, RHS, UCharPtr, RHS->getName(), CI);
- Value *S1V = new LoadInst(Op1Cast, LHS->getName()+".val", CI);
- Value *S2V = new LoadInst(Op2Cast, RHS->getName()+".val", CI);
- Value *RV = BinaryOperator::createSub(S1V, S2V, CI->getName()+".diff",CI);
- if (RV->getType() != CI->getType())
- RV = CastInst::createIntegerCast(RV, CI->getType(), false,
- RV->getName(), CI);
- return ReplaceCallWith(CI, RV);
- }
- case 2:
- if (IsOnlyUsedInEqualsZeroComparison(CI)) {
- // TODO: IF both are aligned, use a short load/compare.
-
- // memcmp(S1,S2,2) -> S1[0]-S2[0] | S1[1]-S2[1] iff only ==/!= 0 matters
- const Type *UCharPtr = PointerType::getUnqual(Type::Int8Ty);
- CastInst *Op1Cast = CastInst::create(
- Instruction::BitCast, LHS, UCharPtr, LHS->getName(), CI);
- CastInst *Op2Cast = CastInst::create(
- Instruction::BitCast, RHS, UCharPtr, RHS->getName(), CI);
- Value *S1V1 = new LoadInst(Op1Cast, LHS->getName()+".val1", CI);
- Value *S2V1 = new LoadInst(Op2Cast, RHS->getName()+".val1", CI);
- Value *D1 = BinaryOperator::createSub(S1V1, S2V1,
- CI->getName()+".d1", CI);
- Constant *One = ConstantInt::get(Type::Int32Ty, 1);
- Value *G1 = GetElementPtrInst::Create(Op1Cast, One, "next1v", CI);
- Value *G2 = GetElementPtrInst::Create(Op2Cast, One, "next2v", CI);
- Value *S1V2 = new LoadInst(G1, LHS->getName()+".val2", CI);
- Value *S2V2 = new LoadInst(G2, RHS->getName()+".val2", CI);
- Value *D2 = BinaryOperator::createSub(S1V2, S2V2,
- CI->getName()+".d1", CI);
- Value *Or = BinaryOperator::createOr(D1, D2, CI->getName()+".res", CI);
- if (Or->getType() != CI->getType())
- Or = CastInst::createIntegerCast(Or, CI->getType(), false /*ZExt*/,
- Or->getName(), CI);
- return ReplaceCallWith(CI, Or);
- }
- break;
- default:
- break;
- }
-
- return false;
- }
-} memcmpOptimizer;
-
-/// This LibCallOptimization will simplify a call to the memcpy library
-/// function. It simply converts them into calls to llvm.memcpy.*;
-/// the resulting call should be optimized later.
-/// @brief Simplify the memcpy library function.
-struct VISIBILITY_HIDDEN MemCpyOptimization : public LibCallOptimization {
-public:
- MemCpyOptimization() : LibCallOptimization("memcpy",
- "Number of 'memcpy' calls simplified") {}
-
- /// @brief Make sure that the "memcpy" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- const Type* voidPtr = PointerType::getUnqual(Type::Int8Ty);
- return FT->getReturnType() == voidPtr && FT->getNumParams() == 3 &&
- FT->getParamType(0) == voidPtr &&
- FT->getParamType(1) == voidPtr &&
- FT->getParamType(2) == SLC.getIntPtrType();
- }
-
- /// @brief Perform the memcpy optimization
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- Value *MemcpyOps[] = {
- CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
- ConstantInt::get(Type::Int32Ty, 1) // align = 1 always.
- };
- CallInst::Create(SLC.get_memcpy(), MemcpyOps, MemcpyOps + 4, "", CI);
- // memcpy always returns the destination
- return ReplaceCallWith(CI, CI->getOperand(1));
- }
-} MemCpyOptimizer;
-
-/// This LibCallOptimization will simplify a call to the memcpy library
-/// function by expanding it out to a single store of size 0, 1, 2, 4, or 8
-/// bytes depending on the length of the string and the alignment. Additional
-/// optimizations are possible in code generation (sequence of immediate store)
-/// @brief Simplify the memcpy library function.
-struct VISIBILITY_HIDDEN LLVMMemCpyMoveOptzn : public LibCallOptimization {
- LLVMMemCpyMoveOptzn(const char* fname, const char* desc)
- : LibCallOptimization(fname, desc) {}
-
- /// @brief Make sure that the "memcpy" function has the right prototype
- virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& TD) {
- // Just make sure this has 4 arguments per LLVM spec.
- return (f->arg_size() == 4);
- }
-
- /// Because of alignment and instruction information that we don't have, we
- /// leave the bulk of this to the code generators. The optimization here just
- /// deals with a few degenerate cases where the length of the string and the
- /// alignment match the sizes of our intrinsic types so we can do a load and
- /// store instead of the memcpy call.
- /// @brief Perform the memcpy optimization.
- virtual bool OptimizeCall(CallInst* ci, SimplifyLibCalls& TD) {
- // Make sure we have constant int values to work with
- ConstantInt* LEN = dyn_cast<ConstantInt>(ci->getOperand(3));
- if (!LEN)
- return false;
- ConstantInt* ALIGN = dyn_cast<ConstantInt>(ci->getOperand(4));
- if (!ALIGN)
- return false;
-
- // If the length is larger than the alignment, we can't optimize
- uint64_t len = LEN->getZExtValue();
- uint64_t alignment = ALIGN->getZExtValue();
- if (alignment == 0)
- alignment = 1; // Alignment 0 is identity for alignment 1
- if (len > alignment)
- return false;
-
- // Get the type we will cast to, based on size of the string
- Value* dest = ci->getOperand(1);
- Value* src = ci->getOperand(2);
- const Type* castType = 0;
- switch (len) {
- case 0:
- // memcpy(d,s,0,a) -> d
- return ReplaceCallWith(ci, 0);
- case 1: castType = Type::Int8Ty; break;
- case 2: castType = Type::Int16Ty; break;
- case 4: castType = Type::Int32Ty; break;
- case 8: castType = Type::Int64Ty; break;
- default:
- return false;
- }
-
- // Cast source and dest to the right sized primitive and then load/store
- CastInst* SrcCast = CastInst::create(Instruction::BitCast,
- src, PointerType::getUnqual(castType), src->getName()+".cast", ci);
- CastInst* DestCast = CastInst::create(Instruction::BitCast,
- dest, PointerType::getUnqual(castType),dest->getName()+".cast", ci);
- LoadInst* LI = new LoadInst(SrcCast,SrcCast->getName()+".val",ci);
- new StoreInst(LI, DestCast, ci);
- return ReplaceCallWith(ci, 0);
- }
-};
-
-/// This LibCallOptimization will simplify a call to the memcpy/memmove library
-/// functions.
-LLVMMemCpyMoveOptzn LLVMMemCpyOptimizer32("llvm.memcpy.i32",
- "Number of 'llvm.memcpy' calls simplified");
-LLVMMemCpyMoveOptzn LLVMMemCpyOptimizer64("llvm.memcpy.i64",
- "Number of 'llvm.memcpy' calls simplified");
-LLVMMemCpyMoveOptzn LLVMMemMoveOptimizer32("llvm.memmove.i32",
- "Number of 'llvm.memmove' calls simplified");
-LLVMMemCpyMoveOptzn LLVMMemMoveOptimizer64("llvm.memmove.i64",
- "Number of 'llvm.memmove' calls simplified");
-
-/// This LibCallOptimization will simplify a call to the memset library
-/// function by expanding it out to a single store of size 0, 1, 2, 4, or 8
-/// bytes depending on the length argument.
-struct VISIBILITY_HIDDEN LLVMMemSetOptimization : public LibCallOptimization {
- /// @brief Default Constructor
- LLVMMemSetOptimization(const char *Name) : LibCallOptimization(Name,
- "Number of 'llvm.memset' calls simplified") {}
-
- /// @brief Make sure that the "memset" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &TD) {
- // Just make sure this has 3 arguments per LLVM spec.
- return F->arg_size() == 4;
- }
-
- /// Because of alignment and instruction information that we don't have, we
- /// leave the bulk of this to the code generators. The optimization here just
- /// deals with a few degenerate cases where the length parameter is constant
- /// and the alignment matches the sizes of our intrinsic types so we can do
- /// store instead of the memcpy call. Other calls are transformed into the
- /// llvm.memset intrinsic.
- /// @brief Perform the memset optimization.
- virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &TD) {
- // Make sure we have constant int values to work with
- ConstantInt* LEN = dyn_cast<ConstantInt>(ci->getOperand(3));
- if (!LEN)
- return false;
- ConstantInt* ALIGN = dyn_cast<ConstantInt>(ci->getOperand(4));
- if (!ALIGN)
- return false;
-
- // Extract the length and alignment
- uint64_t len = LEN->getZExtValue();
- uint64_t alignment = ALIGN->getZExtValue();
-
- // Alignment 0 is identity for alignment 1
- if (alignment == 0)
- alignment = 1;
-
- // If the length is zero, this is a no-op
- if (len == 0) {
- // memset(d,c,0,a) -> noop
- return ReplaceCallWith(ci, 0);
- }
-
- // If the length is larger than the alignment, we can't optimize
- if (len > alignment)
- return false;
-
- // Make sure we have a constant ubyte to work with so we can extract
- // the value to be filled.
- ConstantInt* FILL = dyn_cast<ConstantInt>(ci->getOperand(2));
- if (!FILL)
- return false;
- if (FILL->getType() != Type::Int8Ty)
- return false;
-
- // memset(s,c,n) -> store s, c (for n=1,2,4,8)
-
- // Extract the fill character
- uint64_t fill_char = FILL->getZExtValue();
- uint64_t fill_value = fill_char;
-
- // Get the type we will cast to, based on size of memory area to fill, and
- // and the value we will store there.
- Value* dest = ci->getOperand(1);
- const Type* castType = 0;
- switch (len) {
- case 1:
- castType = Type::Int8Ty;
- break;
- case 2:
- castType = Type::Int16Ty;
- fill_value |= fill_char << 8;
- break;
- case 4:
- castType = Type::Int32Ty;
- fill_value |= fill_char << 8 | fill_char << 16 | fill_char << 24;
- break;
- case 8:
- castType = Type::Int64Ty;
- fill_value |= fill_char << 8 | fill_char << 16 | fill_char << 24;
- fill_value |= fill_char << 32 | fill_char << 40 | fill_char << 48;
- fill_value |= fill_char << 56;
- break;
- default:
- return false;
- }
-
- // Cast dest to the right sized primitive and then load/store
- CastInst* DestCast = new BitCastInst(dest, PointerType::getUnqual(castType),
- dest->getName()+".cast", ci);
- new StoreInst(ConstantInt::get(castType,fill_value),DestCast, ci);
- return ReplaceCallWith(ci, 0);
- }
-};
-
-LLVMMemSetOptimization MemSet32Optimizer("llvm.memset.i32");
-LLVMMemSetOptimization MemSet64Optimizer("llvm.memset.i64");
-
-
-/// This LibCallOptimization will simplify calls to the "pow" library
-/// function. It looks for cases where the result of pow is well known and
-/// substitutes the appropriate value.
-/// @brief Simplify the pow library function.
-struct VISIBILITY_HIDDEN PowOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- PowOptimization(const char *Name) : LibCallOptimization(Name,
- "Number of 'pow' calls simplified") {}
-
- /// @brief Make sure that the "pow" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 2 &&
- FT->getParamType(0) == FT->getParamType(1) &&
- FT->getParamType(0) == FT->getReturnType() &&
- FT->getParamType(0)->isFloatingPoint();
- }
-
- /// @brief Perform the pow optimization.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- Value *Op1 = CI->getOperand(1);
- Value *Op2 = CI->getOperand(2);
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
- return ReplaceCallWith(CI, Op1C);
- if (Op1C->isExactlyValue(2.0)) {// pow(2.0, x) -> exp2(x)
- Value *Exp2 = SLC.getUnaryFloatFunction("exp2", CI->getType());
- Value *Res = CallInst::Create(Exp2, Op2, CI->getName()+"exp2", CI);
- return ReplaceCallWith(CI, Res);
- }
- }
-
- ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return false;
-
- if (Op2C->getValueAPF().isZero()) {
- // pow(x, 0.0) -> 1.0
- return ReplaceCallWith(CI, ConstantFP::get(CI->getType(), 1.0));
- } else if (Op2C->isExactlyValue(0.5)) {
- // FIXME: This is not safe for -0.0 and -inf. This can only be done when
- // 'unsafe' math optimizations are allowed.
- // x pow(x, 0.5) sqrt(x)
- // ---------------------------------------------
- // -0.0 +0.0 -0.0
- // -inf +inf NaN
-#if 0
- // pow(x, 0.5) -> sqrt(x)
- Value *Sqrt = CallInst::Create(SLC.get_sqrt(), Op1, "sqrt", CI);
- return ReplaceCallWith(CI, Sqrt);
-#endif
- } else if (Op2C->isExactlyValue(1.0)) {
- // pow(x, 1.0) -> x
- return ReplaceCallWith(CI, Op1);
- } else if (Op2C->isExactlyValue(2.0)) {
- // pow(x, 2.0) -> x*x
- Value *Sq = BinaryOperator::createMul(Op1, Op1, "pow2", CI);
- return ReplaceCallWith(CI, Sq);
- } else if (Op2C->isExactlyValue(-1.0)) {
- // pow(x, -1.0) -> 1.0/x
- Value *R = BinaryOperator::createFDiv(ConstantFP::get(CI->getType(), 1.0),
- Op1, CI->getName()+".pow", CI);
- return ReplaceCallWith(CI, R);
- }
- return false; // opt failed
- }
-};
-
-PowOptimization PowFOptimizer("powf");
-PowOptimization PowOptimizer("pow");
-PowOptimization PowLOptimizer("powl");
-
-
-/// This LibCallOptimization will simplify calls to the "printf" library
-/// function. It looks for cases where the result of printf is not used and the
-/// operation can be reduced to something simpler.
-/// @brief Simplify the printf library function.
-struct VISIBILITY_HIDDEN PrintfOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- PrintfOptimization() : LibCallOptimization("printf",
- "Number of 'printf' calls simplified") {}
-
- /// @brief Make sure that the "printf" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- // Just make sure this has at least 1 argument and returns an integer or
- // void type.
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() >= 1 &&
- (isa<IntegerType>(FT->getReturnType()) ||
- FT->getReturnType() == Type::VoidTy);
- }
-
- /// @brief Perform the printf optimization.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // All the optimizations depend on the length of the first argument and the
- // fact that it is a constant string array. Check that now
- std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
- return false;
-
- // If this is a simple constant string with no format specifiers that ends
- // with a \n, turn it into a puts call.
- if (FormatStr.empty()) {
- // Tolerate printf's declared void.
- if (CI->use_empty()) return ReplaceCallWith(CI, 0);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0));
- }
-
- if (FormatStr.size() == 1) {
- // Turn this into a putchar call, even if it is a %.
- Value *V = ConstantInt::get(Type::Int32Ty, FormatStr[0]);
- CallInst::Create(SLC.get_putchar(), V, "", CI);
- if (CI->use_empty()) return ReplaceCallWith(CI, 0);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1));
- }
-
- // Check to see if the format str is something like "foo\n", in which case
- // we convert it to a puts call. We don't allow it to contain any format
- // characters.
- if (FormatStr[FormatStr.size()-1] == '\n' &&
- FormatStr.find('%') == std::string::npos) {
- // Create a string literal with no \n on it. We expect the constant merge
- // pass to be run after this pass, to merge duplicate strings.
- FormatStr.erase(FormatStr.end()-1);
- Constant *Init = ConstantArray::get(FormatStr, true);
- Constant *GV = new GlobalVariable(Init->getType(), true,
- GlobalVariable::InternalLinkage,
- Init, "str",
- CI->getParent()->getParent()->getParent());
- // Cast GV to be a pointer to char.
- GV = ConstantExpr::getBitCast(GV, PointerType::getUnqual(Type::Int8Ty));
- CallInst::Create(SLC.get_puts(), GV, "", CI);
-
- if (CI->use_empty()) return ReplaceCallWith(CI, 0);
- // The return value from printf includes the \n we just removed, so +1.
- return ReplaceCallWith(CI,
- ConstantInt::get(CI->getType(),
- FormatStr.size()+1));
- }
-
-
- // Only support %c or "%s\n" for now.
- if (FormatStr.size() < 2 || FormatStr[0] != '%')
- return false;
-
- // Get the second character and switch on its value
- switch (FormatStr[1]) {
- default: return false;
- case 's':
- if (FormatStr != "%s\n" || CI->getNumOperands() < 3 ||
- // TODO: could insert strlen call to compute string length.
- !CI->use_empty())
- return false;
-
- // printf("%s\n",str) -> puts(str)
- CallInst::Create(SLC.get_puts(), CastToCStr(CI->getOperand(2), CI),
- CI->getName(), CI);
- return ReplaceCallWith(CI, 0);
- case 'c': {
- // printf("%c",c) -> putchar(c)
- if (FormatStr.size() != 2 || CI->getNumOperands() < 3)
- return false;
-
- Value *V = CI->getOperand(2);
- if (!isa<IntegerType>(V->getType()) ||
- cast<IntegerType>(V->getType())->getBitWidth() > 32)
- return false;
-
- V = CastInst::createZExtOrBitCast(V, Type::Int32Ty, CI->getName()+".int",
- CI);
- CallInst::Create(SLC.get_putchar(), V, "", CI);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1));
- }
- }
- }
-} PrintfOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "fprintf" library
-/// function. It looks for cases where the result of fprintf is not used and the
-/// operation can be reduced to something simpler.
-/// @brief Simplify the fprintf library function.
-struct VISIBILITY_HIDDEN FPrintFOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- FPrintFOptimization() : LibCallOptimization("fprintf",
- "Number of 'fprintf' calls simplified") {}
-
- /// @brief Make sure that the "fprintf" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 2 && // two fixed arguments.
- FT->getParamType(1) == PointerType::getUnqual(Type::Int8Ty) &&
- isa<PointerType>(FT->getParamType(0)) &&
- isa<IntegerType>(FT->getReturnType());
- }
-
- /// @brief Perform the fprintf optimization.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // If the call has more than 3 operands, we can't optimize it
- if (CI->getNumOperands() != 3 && CI->getNumOperands() != 4)
- return false;
-
- // All the optimizations depend on the format string.
- std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
- return false;
-
- // If this is just a format string, turn it into fwrite.
- if (CI->getNumOperands() == 3) {
- for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
- if (FormatStr[i] == '%')
- return false; // we found a format specifier
-
- // fprintf(file,fmt) -> fwrite(fmt,strlen(fmt),file)
- const Type *FILEty = CI->getOperand(1)->getType();
-
- Value *FWriteArgs[] = {
- CI->getOperand(2),
- ConstantInt::get(SLC.getIntPtrType(), FormatStr.size()),
- ConstantInt::get(SLC.getIntPtrType(), 1),
- CI->getOperand(1)
- };
- CallInst::Create(SLC.get_fwrite(FILEty), FWriteArgs, FWriteArgs + 4, CI->getName(), CI);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(),
- FormatStr.size()));
- }
-
- // The remaining optimizations require the format string to be length 2:
- // "%s" or "%c".
- if (FormatStr.size() != 2 || FormatStr[0] != '%')
- return false;
-
- // Get the second character and switch on its value
- switch (FormatStr[1]) {
- case 'c': {
- // fprintf(file,"%c",c) -> fputc(c,file)
- const Type *FILETy = CI->getOperand(1)->getType();
- Value *C = CastInst::createZExtOrBitCast(CI->getOperand(3), Type::Int32Ty,
- CI->getName()+".int", CI);
- SmallVector<Value *, 2> Args;
- Args.push_back(C);
- Args.push_back(CI->getOperand(1));
- CallInst::Create(SLC.get_fputc(FILETy), Args.begin(), Args.end(), "", CI);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1));
- }
- case 's': {
- const Type *FILETy = CI->getOperand(1)->getType();
-
- // If the result of the fprintf call is used, we can't do this.
- // TODO: we should insert a strlen call.
- if (!CI->use_empty())
- return false;
-
- // fprintf(file,"%s",str) -> fputs(str,file)
- SmallVector<Value *, 2> Args;
- Args.push_back(CastToCStr(CI->getOperand(3), CI));
- Args.push_back(CI->getOperand(1));
- CallInst::Create(SLC.get_fputs(FILETy), Args.begin(),
- Args.end(), CI->getName(), CI);
- return ReplaceCallWith(CI, 0);
- }
- default:
- return false;
- }
- }
-} FPrintFOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "sprintf" library
-/// function. It looks for cases where the result of sprintf is not used and the
-/// operation can be reduced to something simpler.
-/// @brief Simplify the sprintf library function.
-struct VISIBILITY_HIDDEN SPrintFOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- SPrintFOptimization() : LibCallOptimization("sprintf",
- "Number of 'sprintf' calls simplified") {}
-
- /// @brief Make sure that the "sprintf" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 2 && // two fixed arguments.
- FT->getParamType(1) == PointerType::getUnqual(Type::Int8Ty) &&
- FT->getParamType(0) == FT->getParamType(1) &&
- isa<IntegerType>(FT->getReturnType());
- }
-
- /// @brief Perform the sprintf optimization.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // If the call has more than 3 operands, we can't optimize it
- if (CI->getNumOperands() != 3 && CI->getNumOperands() != 4)
- return false;
-
- std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
- return false;
-
- if (CI->getNumOperands() == 3) {
- // Make sure there's no % in the constant array
- for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
- if (FormatStr[i] == '%')
- return false; // we found a format specifier
-
- // sprintf(str,fmt) -> llvm.memcpy(str,fmt,strlen(fmt),1)
- Value *MemCpyArgs[] = {
- CI->getOperand(1), CI->getOperand(2),
- ConstantInt::get(SLC.getIntPtrType(),
- FormatStr.size()+1), // Copy the nul byte.
- ConstantInt::get(Type::Int32Ty, 1)
- };
- CallInst::Create(SLC.get_memcpy(), MemCpyArgs, MemCpyArgs + 4, "", CI);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(),
- FormatStr.size()));
- }
-
- // The remaining optimizations require the format string to be "%s" or "%c".
- if (FormatStr.size() != 2 || FormatStr[0] != '%')
- return false;
-
- // Get the second character and switch on its value
- switch (FormatStr[1]) {
- case 'c': {
- // sprintf(dest,"%c",chr) -> store chr, dest
- Value *V = CastInst::createTruncOrBitCast(CI->getOperand(3),
- Type::Int8Ty, "char", CI);
- new StoreInst(V, CI->getOperand(1), CI);
- Value *Ptr = GetElementPtrInst::Create(CI->getOperand(1),
- ConstantInt::get(Type::Int32Ty, 1),
- CI->getOperand(1)->getName()+".end",
- CI);
- new StoreInst(ConstantInt::get(Type::Int8Ty,0), Ptr, CI);
- return ReplaceCallWith(CI, ConstantInt::get(Type::Int32Ty, 1));
- }
- case 's': {
- // sprintf(dest,"%s",str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- Value *Len = CallInst::Create(SLC.get_strlen(),
- CastToCStr(CI->getOperand(3), CI),
- CI->getOperand(3)->getName()+".len", CI);
- Value *UnincLen = Len;
- Len = BinaryOperator::createAdd(Len, ConstantInt::get(Len->getType(), 1),
- Len->getName()+"1", CI);
- Value *MemcpyArgs[4] = {
- CI->getOperand(1),
- CastToCStr(CI->getOperand(3), CI),
- Len,
- ConstantInt::get(Type::Int32Ty, 1)
- };
- CallInst::Create(SLC.get_memcpy(), MemcpyArgs, MemcpyArgs + 4, "", CI);
-
- // The strlen result is the unincremented number of bytes in the string.
- if (!CI->use_empty()) {
- if (UnincLen->getType() != CI->getType())
- UnincLen = CastInst::createIntegerCast(UnincLen, CI->getType(), false,
- Len->getName(), CI);
- CI->replaceAllUsesWith(UnincLen);
- }
- return ReplaceCallWith(CI, 0);
- }
- }
- return false;
- }
-} SPrintFOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "fputs" library
-/// function. It looks for cases where the result of fputs is not used and the
-/// operation can be reduced to something simpler.
-/// @brief Simplify the fputs library function.
-struct VISIBILITY_HIDDEN FPutsOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- FPutsOptimization() : LibCallOptimization("fputs",
- "Number of 'fputs' calls simplified") {}
-
- /// @brief Make sure that the "fputs" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- // Just make sure this has 2 arguments
- return F->arg_size() == 2;
- }
-
- /// @brief Perform the fputs optimization.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // If the result is used, none of these optimizations work.
- if (!CI->use_empty())
- return false;
-
- // All the optimizations depend on the length of the first argument and the
- // fact that it is a constant string array. Check that now
- std::string Str;
- if (!GetConstantStringInfo(CI->getOperand(1), Str))
- return false;
-
- const Type *FILETy = CI->getOperand(2)->getType();
- // fputs(s,F) -> fwrite(s,1,len,F) (if s is constant and strlen(s) > 1)
- Value *FWriteParms[4] = {
- CI->getOperand(1),
- ConstantInt::get(SLC.getIntPtrType(), Str.size()),
- ConstantInt::get(SLC.getIntPtrType(), 1),
- CI->getOperand(2)
- };
- CallInst::Create(SLC.get_fwrite(FILETy), FWriteParms, FWriteParms + 4, "", CI);
- return ReplaceCallWith(CI, 0); // Known to have no uses (see above).
- }
-} FPutsOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "fwrite" function.
-struct VISIBILITY_HIDDEN FWriteOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- FWriteOptimization() : LibCallOptimization("fwrite",
- "Number of 'fwrite' calls simplified") {}
-
- /// @brief Make sure that the "fputs" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- const FunctionType *FT = F->getFunctionType();
- return FT->getNumParams() == 4 &&
- FT->getParamType(0) == PointerType::getUnqual(Type::Int8Ty) &&
- FT->getParamType(1) == FT->getParamType(2) &&
- isa<IntegerType>(FT->getParamType(1)) &&
- isa<PointerType>(FT->getParamType(3)) &&
- isa<IntegerType>(FT->getReturnType());
- }
-
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // Get the element size and count.
- uint64_t EltSize, EltCount;
- if (ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(2)))
- EltSize = C->getZExtValue();
- else
- return false;
- if (ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(3)))
- EltCount = C->getZExtValue();
- else
- return false;
-
- // If this is writing zero records, remove the call (it's a noop).
- if (EltSize * EltCount == 0)
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0));
-
- // If this is writing one byte, turn it into fputc.
- if (EltSize == 1 && EltCount == 1) {
- SmallVector<Value *, 2> Args;
- // fwrite(s,1,1,F) -> fputc(s[0],F)
- Value *Ptr = CI->getOperand(1);
- Value *Val = new LoadInst(Ptr, Ptr->getName()+".byte", CI);
- Args.push_back(new ZExtInst(Val, Type::Int32Ty, Val->getName()+".int", CI));
- Args.push_back(CI->getOperand(4));
- const Type *FILETy = CI->getOperand(4)->getType();
- CallInst::Create(SLC.get_fputc(FILETy), Args.begin(), Args.end(), "", CI);
- return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1));
- }
- return false;
- }
-} FWriteOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "isdigit" library
-/// function. It simply does range checks the parameter explicitly.
-/// @brief Simplify the isdigit library function.
-struct VISIBILITY_HIDDEN isdigitOptimization : public LibCallOptimization {
-public:
- isdigitOptimization() : LibCallOptimization("isdigit",
- "Number of 'isdigit' calls simplified") {}
-
- /// @brief Make sure that the "isdigit" function has the right prototype
- virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& SLC){
- // Just make sure this has 1 argument
- return (f->arg_size() == 1);
- }
-
- /// @brief Perform the toascii optimization.
- virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &SLC) {
- if (ConstantInt* CI = dyn_cast<ConstantInt>(ci->getOperand(1))) {
- // isdigit(c) -> 0 or 1, if 'c' is constant
- uint64_t val = CI->getZExtValue();
- if (val >= '0' && val <= '9')
- return ReplaceCallWith(ci, ConstantInt::get(Type::Int32Ty, 1));
- else
- return ReplaceCallWith(ci, ConstantInt::get(Type::Int32Ty, 0));
- }
-
- // isdigit(c) -> (unsigned)c - '0' <= 9
- CastInst* cast = CastInst::createIntegerCast(ci->getOperand(1),
- Type::Int32Ty, false/*ZExt*/, ci->getOperand(1)->getName()+".uint", ci);
- BinaryOperator* sub_inst = BinaryOperator::createSub(cast,
- ConstantInt::get(Type::Int32Ty,0x30),
- ci->getOperand(1)->getName()+".sub",ci);
- ICmpInst* setcond_inst = new ICmpInst(ICmpInst::ICMP_ULE,sub_inst,
- ConstantInt::get(Type::Int32Ty,9),
- ci->getOperand(1)->getName()+".cmp",ci);
- CastInst* c2 = new ZExtInst(setcond_inst, Type::Int32Ty,
- ci->getOperand(1)->getName()+".isdigit", ci);
- return ReplaceCallWith(ci, c2);
- }
-} isdigitOptimizer;
-
-struct VISIBILITY_HIDDEN isasciiOptimization : public LibCallOptimization {
-public:
- isasciiOptimization()
- : LibCallOptimization("isascii", "Number of 'isascii' calls simplified") {}
-
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- return F->arg_size() == 1 && F->arg_begin()->getType()->isInteger() &&
- F->getReturnType()->isInteger();
- }
-
- /// @brief Perform the isascii optimization.
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
- // isascii(c) -> (unsigned)c < 128
- Value *V = CI->getOperand(1);
- Value *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, V,
- ConstantInt::get(V->getType(), 128),
- V->getName()+".isascii", CI);
- if (Cmp->getType() != CI->getType())
- Cmp = new ZExtInst(Cmp, CI->getType(), Cmp->getName(), CI);
- return ReplaceCallWith(CI, Cmp);
- }
-} isasciiOptimizer;
-
-
-/// This LibCallOptimization will simplify calls to the "toascii" library
-/// function. It simply does the corresponding and operation to restrict the
-/// range of values to the ASCII character set (0-127).
-/// @brief Simplify the toascii library function.
-struct VISIBILITY_HIDDEN ToAsciiOptimization : public LibCallOptimization {
-public:
- /// @brief Default Constructor
- ToAsciiOptimization() : LibCallOptimization("toascii",
- "Number of 'toascii' calls simplified") {}
-
- /// @brief Make sure that the "fputs" function has the right prototype
- virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& SLC){
- // Just make sure this has 2 arguments
- return (f->arg_size() == 1);
- }
-
- /// @brief Perform the toascii optimization.
- virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &SLC) {
- // toascii(c) -> (c & 0x7f)
- Value *chr = ci->getOperand(1);
- Value *and_inst = BinaryOperator::createAnd(chr,
- ConstantInt::get(chr->getType(),0x7F),ci->getName()+".toascii",ci);
- return ReplaceCallWith(ci, and_inst);
- }
-} ToAsciiOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "ffs" library
-/// calls which find the first set bit in an int, long, or long long. The
-/// optimization is to compute the result at compile time if the argument is
-/// a constant.
-/// @brief Simplify the ffs library function.
-struct VISIBILITY_HIDDEN FFSOptimization : public LibCallOptimization {
-protected:
- /// @brief Subclass Constructor
- FFSOptimization(const char* funcName, const char* description)
- : LibCallOptimization(funcName, description) {}
-
-public:
- /// @brief Default Constructor
- FFSOptimization() : LibCallOptimization("ffs",
- "Number of 'ffs' calls simplified") {}
-
- /// @brief Make sure that the "ffs" function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- // Just make sure this has 2 arguments
- return F->arg_size() == 1 && F->getReturnType() == Type::Int32Ty;
- }
-
- /// @brief Perform the ffs optimization.
- virtual bool OptimizeCall(CallInst *TheCall, SimplifyLibCalls &SLC) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(TheCall->getOperand(1))) {
- // ffs(cnst) -> bit#
- // ffsl(cnst) -> bit#
- // ffsll(cnst) -> bit#
- uint64_t val = CI->getZExtValue();
- int result = 0;
- if (val) {
- ++result;
- while ((val & 1) == 0) {
- ++result;
- val >>= 1;
- }
- }
- return ReplaceCallWith(TheCall, ConstantInt::get(Type::Int32Ty, result));
- }
-
- // ffs(x) -> x == 0 ? 0 : llvm.cttz(x)+1
- // ffsl(x) -> x == 0 ? 0 : llvm.cttz(x)+1
- // ffsll(x) -> x == 0 ? 0 : llvm.cttz(x)+1
- const Type *ArgType = TheCall->getOperand(1)->getType();
- assert(ArgType->getTypeID() == Type::IntegerTyID &&
- "llvm.cttz argument is not an integer?");
- Constant *F = Intrinsic::getDeclaration(SLC.getModule(),
- Intrinsic::cttz, &ArgType, 1);
-
- Value *V = CastInst::createIntegerCast(TheCall->getOperand(1), ArgType,
- false/*ZExt*/, "tmp", TheCall);
- Value *V2 = CallInst::Create(F, V, "tmp", TheCall);
- V2 = CastInst::createIntegerCast(V2, Type::Int32Ty, false/*ZExt*/,
- "tmp", TheCall);
- V2 = BinaryOperator::createAdd(V2, ConstantInt::get(Type::Int32Ty, 1),
- "tmp", TheCall);
- Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, V,
- Constant::getNullValue(V->getType()), "tmp",
- TheCall);
- V2 = SelectInst::Create(Cond, ConstantInt::get(Type::Int32Ty, 0), V2,
- TheCall->getName(), TheCall);
- return ReplaceCallWith(TheCall, V2);
- }
-} FFSOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "ffsl" library
-/// calls. It simply uses FFSOptimization for which the transformation is
-/// identical.
-/// @brief Simplify the ffsl library function.
-struct VISIBILITY_HIDDEN FFSLOptimization : public FFSOptimization {
-public:
- /// @brief Default Constructor
- FFSLOptimization() : FFSOptimization("ffsl",
- "Number of 'ffsl' calls simplified") {}
-
-} FFSLOptimizer;
-
-/// This LibCallOptimization will simplify calls to the "ffsll" library
-/// calls. It simply uses FFSOptimization for which the transformation is
-/// identical.
-/// @brief Simplify the ffsl library function.
-struct VISIBILITY_HIDDEN FFSLLOptimization : public FFSOptimization {
-public:
- /// @brief Default Constructor
- FFSLLOptimization() : FFSOptimization("ffsll",
- "Number of 'ffsll' calls simplified") {}
-
-} FFSLLOptimizer;
-
-/// This optimizes unary functions that take and return doubles.
-struct UnaryDoubleFPOptimizer : public LibCallOptimization {
- UnaryDoubleFPOptimizer(const char *Fn, const char *Desc)
- : LibCallOptimization(Fn, Desc) {}
-
- // Make sure that this function has the right prototype
- virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){
- return F->arg_size() == 1 && F->arg_begin()->getType() == Type::DoubleTy &&
- F->getReturnType() == Type::DoubleTy;
- }
-
- /// ShrinkFunctionToFloatVersion - If the input to this function is really a
- /// float, strength reduce this to a float version of the function,
- /// e.g. floor((double)FLT) -> (double)floorf(FLT). This can only be called
- /// when the target supports the destination function and where there can be
- /// no precision loss.
- static bool ShrinkFunctionToFloatVersion(CallInst *CI, SimplifyLibCalls &SLC,
- Constant *(SimplifyLibCalls::*FP)()){
- if (FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)))
- if (Cast->getOperand(0)->getType() == Type::FloatTy) {
- Value *New = CallInst::Create((SLC.*FP)(), Cast->getOperand(0),
- CI->getName(), CI);
- New = new FPExtInst(New, Type::DoubleTy, CI->getName(), CI);
- CI->replaceAllUsesWith(New);
- CI->eraseFromParent();
- if (Cast->use_empty())
- Cast->eraseFromParent();
- return true;
- }
- return false;
- }
-};
-
-
-struct VISIBILITY_HIDDEN FloorOptimization : public UnaryDoubleFPOptimizer {
- FloorOptimization()
- : UnaryDoubleFPOptimizer("floor", "Number of 'floor' calls simplified") {}
-
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
-#ifdef HAVE_FLOORF
- // If this is a float argument passed in, convert to floorf.
- if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_floorf))
- return true;
-#endif
- return false; // opt failed
- }
-} FloorOptimizer;
-
-struct VISIBILITY_HIDDEN CeilOptimization : public UnaryDoubleFPOptimizer {
- CeilOptimization()
- : UnaryDoubleFPOptimizer("ceil", "Number of 'ceil' calls simplified") {}
-
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
-#ifdef HAVE_CEILF
- // If this is a float argument passed in, convert to ceilf.
- if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_ceilf))
- return true;
-#endif
- return false; // opt failed
- }
-} CeilOptimizer;
-
-struct VISIBILITY_HIDDEN RoundOptimization : public UnaryDoubleFPOptimizer {
- RoundOptimization()
- : UnaryDoubleFPOptimizer("round", "Number of 'round' calls simplified") {}
-
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
-#ifdef HAVE_ROUNDF
- // If this is a float argument passed in, convert to roundf.
- if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_roundf))
- return true;
-#endif
- return false; // opt failed
- }
-} RoundOptimizer;
-
-struct VISIBILITY_HIDDEN RintOptimization : public UnaryDoubleFPOptimizer {
- RintOptimization()
- : UnaryDoubleFPOptimizer("rint", "Number of 'rint' calls simplified") {}
-
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
-#ifdef HAVE_RINTF
- // If this is a float argument passed in, convert to rintf.
- if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_rintf))
- return true;
-#endif
- return false; // opt failed
- }
-} RintOptimizer;
-
-struct VISIBILITY_HIDDEN NearByIntOptimization : public UnaryDoubleFPOptimizer {
- NearByIntOptimization()
- : UnaryDoubleFPOptimizer("nearbyint",
- "Number of 'nearbyint' calls simplified") {}
-
- virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) {
-#ifdef HAVE_NEARBYINTF
- // If this is a float argument passed in, convert to nearbyintf.
- if (ShrinkFunctionToFloatVersion(CI, SLC,&SimplifyLibCalls::get_nearbyintf))
- return true;
-#endif
- return false; // opt failed
- }
-} NearByIntOptimizer;
-
-/// GetConstantStringInfo - This function computes the length of a
-/// null-terminated constant array of integers. This function can't rely on the
-/// size of the constant array because there could be a null terminator in the
-/// middle of the array.
-///
-/// We also have to bail out if we find a non-integer constant initializer
-/// of one of the elements or if there is no null-terminator. The logic
-/// below checks each of these conditions and will return true only if all
-/// conditions are met. If the conditions aren't met, this returns false.
-///
-/// If successful, the \p Array param is set to the constant array being
-/// indexed, the \p Length parameter is set to the length of the null-terminated
-/// string pointed to by V, the \p StartIdx value is set to the first
-/// element of the Array that V points to, and true is returned.
-static bool GetConstantStringInfo(Value *V, std::string &Str) {
- // Look through noop bitcast instructions.
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
- if (BCI->getType() == BCI->getOperand(0)->getType())
- return GetConstantStringInfo(BCI->getOperand(0), Str);
- return false;
- }
-
- // If the value is not a GEP instruction nor a constant expression with a
- // GEP instruction, then return false because ConstantArray can't occur
- // any other way
- User *GEP = 0;
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
- GEP = GEPI;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (CE->getOpcode() != Instruction::GetElementPtr)
- return false;
- GEP = CE;
- } else {
- return false;
- }
-
- // Make sure the GEP has exactly three arguments.
- if (GEP->getNumOperands() != 3)
- return false;
-
- // Check to make sure that the first operand of the GEP is an integer and
- // has value 0 so that we are sure we're indexing into the initializer.
- if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
- if (!Idx->isZero())
- return false;
- } else
- return false;
-
- // If the second index isn't a ConstantInt, then this is a variable index
- // into the array. If this occurs, we can't say anything meaningful about
- // the string.
- uint64_t StartIdx = 0;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
- StartIdx = CI->getZExtValue();
- else
- return false;
-
- // The GEP instruction, constant or instruction, must reference a global
- // variable that is a constant and is initialized. The referenced constant
- // initializer is the array that we'll use for optimization.
- GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
- if (!GV || !GV->isConstant() || !GV->hasInitializer())
- return false;
- Constant *GlobalInit = GV->getInitializer();
-
- // Handle the ConstantAggregateZero case
- if (isa<ConstantAggregateZero>(GlobalInit)) {
- // This is a degenerate case. The initializer is constant zero so the
- // length of the string must be zero.
- Str.clear();
- return true;
- }
-
- // Must be a Constant Array
- ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
- if (!Array) return false;
-
- // Get the number of elements in the array
- uint64_t NumElts = Array->getType()->getNumElements();
-
- // Traverse the constant array from StartIdx (derived above) which is
- // the place the GEP refers to in the array.
- for (unsigned i = StartIdx; i < NumElts; ++i) {
- Constant *Elt = Array->getOperand(i);
- ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI) // This array isn't suitable, non-int initializer.
- return false;
- if (CI->isZero())
- return true; // we found end of string, success!
- Str += (char)CI->getZExtValue();
- }
-
- return false; // The array isn't null terminated.
-}
-
-/// CastToCStr - Return V if it is an sbyte*, otherwise cast it to sbyte*,
-/// inserting the cast before IP, and return the cast.
-/// @brief Cast a value to a "C" string.
-static Value *CastToCStr(Value *V, Instruction *IP) {
- assert(isa<PointerType>(V->getType()) &&
- "Can't cast non-pointer type to C string type");
- const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty);
- if (V->getType() != SBPTy)
- return new BitCastInst(V, SBPTy, V->getName(), IP);
- return V;
-}
-
-// TODO:
-// Additional cases that we need to add to this file:
-//
-// cbrt:
-// * cbrt(expN(X)) -> expN(x/3)
-// * cbrt(sqrt(x)) -> pow(x,1/6)
-// * cbrt(sqrt(x)) -> pow(x,1/9)
-//
-// cos, cosf, cosl:
-// * cos(-x) -> cos(x)
-//
-// exp, expf, expl:
-// * exp(log(x)) -> x
-//
-// log, logf, logl:
-// * log(exp(x)) -> x
-// * log(x**y) -> y*log(x)
-// * log(exp(y)) -> y*log(e)
-// * log(exp2(y)) -> y*log(2)
-// * log(exp10(y)) -> y*log(10)
-// * log(sqrt(x)) -> 0.5*log(x)
-// * log(pow(x,y)) -> y*log(x)
-//
-// lround, lroundf, lroundl:
-// * lround(cnst) -> cnst'
-//
-// memcmp:
-// * memcmp(x,y,l) -> cnst
-// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
-//
-// memmove:
-// * memmove(d,s,l,a) -> memcpy(d,s,l,a)
-// (if s is a global constant array)
-//
-// pow, powf, powl:
-// * pow(exp(x),y) -> exp(x*y)
-// * pow(sqrt(x),y) -> pow(x,y*0.5)
-// * pow(pow(x,y),z)-> pow(x,y*z)
-//
-// puts:
-// * puts("") -> putchar("\n")
-//
-// round, roundf, roundl:
-// * round(cnst) -> cnst'
-//
-// signbit:
-// * signbit(cnst) -> cnst'
-// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
-//
-// sqrt, sqrtf, sqrtl:
-// * sqrt(expN(x)) -> expN(x*0.5)
-// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
-// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
-//
-// stpcpy:
-// * stpcpy(str, "literal") ->
-// llvm.memcpy(str,"literal",strlen("literal")+1,1)
-// strrchr:
-// * strrchr(s,c) -> reverse_offset_of_in(c,s)
-// (if c is a constant integer and s is a constant string)
-// * strrchr(s1,0) -> strchr(s1,0)
-//
-// strncat:
-// * strncat(x,y,0) -> x
-// * strncat(x,y,0) -> x (if strlen(y) = 0)
-// * strncat(x,y,l) -> strcat(x,y) (if y and l are constants an l > strlen(y))
-//
-// strncpy:
-// * strncpy(d,s,0) -> d
-// * strncpy(d,s,l) -> memcpy(d,s,l,1)
-// (if s and l are constants)
-//
-// strpbrk:
-// * strpbrk(s,a) -> offset_in_for(s,a)
-// (if s and a are both constant strings)
-// * strpbrk(s,"") -> 0
-// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
-//
-// strspn, strcspn:
-// * strspn(s,a) -> const_int (if both args are constant)
-// * strspn("",a) -> 0
-// * strspn(s,"") -> 0
-// * strcspn(s,a) -> const_int (if both args are constant)
-// * strcspn("",a) -> 0
-// * strcspn(s,"") -> strlen(a)
-//
-// strstr:
-// * strstr(x,x) -> x
-// * strstr(s1,s2) -> offset_of_s2_in(s1)
-// (if s1 and s2 are constant strings)
-//
-// tan, tanf, tanl:
-// * tan(atan(x)) -> x
-//
-// trunc, truncf, truncl:
-// * trunc(cnst) -> cnst'
-//
-//
-}
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/StripDeadPrototypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/StripDeadPrototypes.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/StripDeadPrototypes.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/StripDeadPrototypes.cpp Sun Jul 6 15:45:41 2008
@@ -1,4 +1,4 @@
-//===-- StripDeadPrototypes.cpp - Removed unused function declarations ----===//
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,7 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This pass loops over all of the functions in the input module, looking for
-// dead declarations and removes them.
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
//
//===----------------------------------------------------------------------===//
@@ -32,12 +34,12 @@
virtual bool runOnModule(Module &M);
};
-char StripDeadPrototypesPass::ID = 0;
-RegisterPass<StripDeadPrototypesPass> X("strip-dead-prototypes",
- "Strip Unused Function Prototypes");
-
} // end anonymous namespace
+char StripDeadPrototypesPass::ID = 0;
+static RegisterPass<StripDeadPrototypesPass>
+X("strip-dead-prototypes", "Strip Unused Function Prototypes");
+
bool StripDeadPrototypesPass::runOnModule(Module &M) {
bool MadeChange = false;
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/StripSymbols.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/StripSymbols.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/StripSymbols.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/StripSymbols.cpp Sun Jul 6 15:45:41 2008
@@ -46,11 +46,12 @@
AU.setPreservesAll();
}
};
-
- char StripSymbols::ID = 0;
- RegisterPass<StripSymbols> X("strip", "Strip all symbols from a module");
}
+char StripSymbols::ID = 0;
+static RegisterPass<StripSymbols>
+X("strip", "Strip all symbols from a module");
+
ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
return new StripSymbols(OnlyDebugInfo);
}
Modified: llvm/branches/non-call-eh/lib/Transforms/IPO/StructRetPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/IPO/StructRetPromotion.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/IPO/StructRetPromotion.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/IPO/StructRetPromotion.cpp Sun Jul 6 15:45:41 2008
@@ -1,4 +1,4 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments -000000------------===//
+//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// TODO : Describe this pass.
+// This pass finds functions that return a struct (using a pointer to the struct
+// as the first argument of the function, marked with the 'sret' attribute) and
+// replaces them with a new function that simply returns each of the elements of
+// that struct (using multiple return values).
+//
+// This pass works under a number of conditions:
+// 1. The returned struct must not contain other structs
+// 2. The returned struct must only be used to load values from
+// 3. The placeholder struct passed in is the result of an alloca
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sretpromotion"
@@ -49,12 +58,12 @@
void updateCallSites(Function *F, Function *NF);
bool nestedStructType(const StructType *STy);
};
-
- char SRETPromotion::ID = 0;
- RegisterPass<SRETPromotion> X("sretpromotion",
- "Promote sret arguments to multiple ret values");
}
+char SRETPromotion::ID = 0;
+static RegisterPass<SRETPromotion>
+X("sretpromotion", "Promote sret arguments to multiple ret values");
+
Pass *llvm::createStructRetPromotionPass() {
return new SRETPromotion();
}
@@ -73,7 +82,7 @@
bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
Function *F = CGN->getFunction();
- if (!F || F->isDeclaration())
+ if (!F || F->isDeclaration() || !F->hasInternalLinkage())
return false;
// Make sure that function returns struct.
@@ -83,7 +92,7 @@
assert (F->getReturnType() == Type::VoidTy && "Invalid function return type");
Function::arg_iterator AI = F->arg_begin();
const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
- assert (FArgType && "Invalid sret paramater type");
+ assert (FArgType && "Invalid sret parameter type");
const llvm::StructType *STy =
dyn_cast<StructType>(FArgType->getElementType());
assert (STy && "Invalid sret parameter element type");
@@ -140,7 +149,7 @@
return true;
}
- // Check if it is ok to perform this promotion.
+// Check if it is ok to perform this promotion.
bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
if (F->use_empty())
@@ -149,9 +158,17 @@
for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end();
FnUseI != FnUseE; ++FnUseI) {
+ // The function is passed in as an argument to (possibly) another function,
+ // we can't change it!
+ if (FnUseI.getOperandNo() != 0)
+ return false;
CallSite CS = CallSite::get(*FnUseI);
Instruction *Call = CS.getInstruction();
+ // The function is used by something else than a call or invoke instruction,
+ // we can't change it!
+ if (!Call)
+ return false;
CallSite::arg_iterator AI = CS.arg_begin();
Value *FirstArg = *AI;
@@ -223,7 +240,7 @@
FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg());
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
- NF->setCallingConv(F->getCallingConv());
+ NF->copyAttributesFrom(F);
NF->setParamAttrs(PAListPtr::get(ParamAttrsVec.begin(), ParamAttrsVec.end()));
F->getParent()->getFunctionList().insert(F, NF);
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
@@ -330,7 +347,7 @@
unsigned Num = STy->getNumElements();
for (unsigned i = 0; i < Num; i++) {
const Type *Ty = STy->getElementType(i);
- if (!Ty->isFirstClassType() && Ty != Type::VoidTy)
+ if (!Ty->isSingleValueType() && Ty != Type::VoidTy)
return true;
}
return false;
Modified: llvm/branches/non-call-eh/lib/Transforms/Instrumentation/BlockProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Instrumentation/BlockProfiling.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Instrumentation/BlockProfiling.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Instrumentation/BlockProfiling.cpp Sun Jul 6 15:45:41 2008
@@ -36,14 +36,14 @@
static char ID;
bool runOnModule(Module &M);
};
+}
- char FunctionProfiler::ID = 0;
-
- RegisterPass<FunctionProfiler> X("insert-function-profiling",
- "Insert instrumentation for function profiling");
- RegisterAnalysisGroup<RSProfilers> XG(X);
+char FunctionProfiler::ID = 0;
-}
+static RegisterPass<FunctionProfiler>
+X("insert-function-profiling",
+ "Insert instrumentation for function profiling");
+static RegisterAnalysisGroup<RSProfilers> XG(X);
ModulePass *llvm::createFunctionProfilerPass() {
return new FunctionProfiler();
@@ -86,13 +86,13 @@
public:
static char ID;
};
-
- char BlockProfiler::ID = 0;
- RegisterPass<BlockProfiler> Y("insert-block-profiling",
- "Insert instrumentation for block profiling");
- RegisterAnalysisGroup<RSProfilers> YG(Y);
}
+char BlockProfiler::ID = 0;
+static RegisterPass<BlockProfiler>
+Y("insert-block-profiling", "Insert instrumentation for block profiling");
+static RegisterAnalysisGroup<RSProfilers> YG(Y);
+
ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); }
bool BlockProfiler::runOnModule(Module &M) {
Modified: llvm/branches/non-call-eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Instrumentation/EdgeProfiling.cpp Sun Jul 6 15:45:41 2008
@@ -36,12 +36,12 @@
static char ID; // Pass identification, replacement for typeid
EdgeProfiler() : ModulePass((intptr_t)&ID) {}
};
-
- char EdgeProfiler::ID = 0;
- RegisterPass<EdgeProfiler> X("insert-edge-profiling",
- "Insert instrumentation for edge profiling");
}
+char EdgeProfiler::ID = 0;
+static RegisterPass<EdgeProfiler>
+X("insert-edge-profiling", "Insert instrumentation for edge profiling");
+
ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
bool EdgeProfiler::runOnModule(Module &M) {
Modified: llvm/branches/non-call-eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Instrumentation/ProfilingUtils.cpp Sun Jul 6 15:45:41 2008
@@ -68,7 +68,7 @@
Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
false);
InitCall->setOperand(2,
- CastInst::create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+ CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
} else {
InitCall->setOperand(2, AI);
}
@@ -83,11 +83,11 @@
if (!AI->use_empty()) {
opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
AI->replaceAllUsesWith(
- CastInst::create(opcode, InitCall, AI->getType(), "", InsertPos));
+ CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
}
opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true);
InitCall->setOperand(1,
- CastInst::create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall));
+ CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall));
} else {
AI->replaceAllUsesWith(InitCall);
InitCall->setOperand(1, AI);
@@ -100,8 +100,8 @@
void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
GlobalValue *CounterArray) {
// Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = BB->begin();
- while (isa<AllocaInst>(InsertPos) || isa<PHINode>(InsertPos))
+ BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ while (isa<AllocaInst>(InsertPos))
++InsertPos;
// Create the getelementptr constant expression
@@ -113,7 +113,7 @@
// Load, increment and store the value back.
Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
- Value *NewVal = BinaryOperator::create(Instruction::Add, OldVal,
+ Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
ConstantInt::get(Type::Int32Ty, 1),
"NewFuncCounter", InsertPos);
new StoreInst(NewVal, ElementPtr, InsertPos);
Modified: llvm/branches/non-call-eh/lib/Transforms/Instrumentation/RSProfiling.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Instrumentation/RSProfiling.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Instrumentation/RSProfiling.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Instrumentation/RSProfiling.cpp Sun Jul 6 15:45:41 2008
@@ -55,16 +55,18 @@
enum RandomMeth {
GBV, GBVO, HOSTCC
};
+}
- cl::opt<RandomMeth> RandomMethod("profile-randomness",
- cl::desc("How to randomly choose to profile:"),
- cl::values(
- clEnumValN(GBV, "global", "global counter"),
- clEnumValN(GBVO, "ra_global",
- "register allocated global counter"),
- clEnumValN(HOSTCC, "rdcc", "cycle counter"),
- clEnumValEnd));
+static cl::opt<RandomMeth> RandomMethod("profile-randomness",
+ cl::desc("How to randomly choose to profile:"),
+ cl::values(
+ clEnumValN(GBV, "global", "global counter"),
+ clEnumValN(GBVO, "ra_global",
+ "register allocated global counter"),
+ clEnumValN(HOSTCC, "rdcc", "cycle counter"),
+ clEnumValEnd));
+namespace {
/// NullProfilerRS - The basic profiler that does nothing. It is the default
/// profiler and thus terminates RSProfiler chains. It is useful for
/// measuring framework overhead
@@ -81,12 +83,14 @@
AU.setPreservesAll();
}
};
+}
- static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
- static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
- "Measure profiling framework overhead");
- static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
+static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
+static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
+ "Measure profiling framework overhead");
+static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
+namespace {
/// Chooser - Something that chooses when to make a sample of the profiled code
class VISIBILITY_HIDDEN Chooser {
public:
@@ -158,11 +162,12 @@
bool doInitialization(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
};
-
- RegisterPass<ProfilerRS> X("insert-rs-profiling-framework",
- "Insert random sampling instrumentation framework");
}
+static RegisterPass<ProfilerRS>
+X("insert-rs-profiling-framework",
+ "Insert random sampling instrumentation framework");
+
char RSProfilers::ID = 0;
char NullProfilerRS::ID = 0;
char ProfilerRS::ID = 0;
@@ -210,7 +215,7 @@
ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),
"countercc", t);
- Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
+ Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
"counternew", t);
new StoreInst(nv, Counter, t);
t->setCondition(s);
@@ -260,14 +265,11 @@
new StoreInst(l, Counter, bib);
BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest();
- BasicBlock::iterator i = bb->begin();
- while (isa<PHINode>(i))
- ++i;
+ BasicBlock::iterator i = bb->getFirstNonPHI();
l = new LoadInst(Counter, "counter", i);
bb = cast<InvokeInst>(bib)->getUnwindDest();
- i = bb->begin();
- while (isa<PHINode>(i)) ++i;
+ i = bb->getFirstNonPHI();
l = new LoadInst(Counter, "counter", i);
new StoreInst(l, AI, i);
} else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
@@ -285,7 +287,7 @@
ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),
"countercc", t);
- Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
+ Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
"counternew", t);
new StoreInst(nv, AI, t);
t->setCondition(s);
@@ -314,7 +316,7 @@
CallInst* c = CallInst::Create(F, "rdcc", t);
BinaryOperator* b =
- BinaryOperator::createAnd(c, ConstantInt::get(Type::Int64Ty, rm),
+ BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm),
"mrdcc", t);
ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
@@ -338,8 +340,8 @@
void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
GlobalValue *CounterArray) {
// Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = BB->begin();
- while (isa<AllocaInst>(InsertPos) || isa<PHINode>(InsertPos))
+ BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ while (isa<AllocaInst>(InsertPos))
++InsertPos;
// Create the getelementptr constant expression
@@ -352,7 +354,7 @@
// Load, increment and store the value back.
Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
profcode.insert(OldVal);
- Value *NewVal = BinaryOperator::createAdd(OldVal,
+ Value *NewVal = BinaryOperator::CreateAdd(OldVal,
ConstantInt::get(Type::Int32Ty, 1),
"NewCounter", InsertPos);
profcode.insert(NewVal);
@@ -376,8 +378,8 @@
if (bb == &bb->getParent()->getEntryBlock())
TransCache[bb] = bb; //don't translate entry block
else
- TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(), bb->getParent(),
- NULL);
+ TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(),
+ bb->getParent(), NULL);
return TransCache[bb];
} else if (Instruction* i = dyn_cast<Instruction>(v)) {
//we have already translated this
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/ADCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/ADCE.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/ADCE.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/ADCE.cpp Sun Jul 6 15:45:41 2008
@@ -1,4 +1,4 @@
-//===- ADCE.cpp - Code to perform aggressive dead code elimination --------===//
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,464 +7,91 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements "aggressive" dead code elimination. ADCE is DCe where
-// values are assumed to be dead until proven otherwise. This is similar to
-// SCCP, except applied to the liveness of values.
+// This file implements the Aggressive Dead Code Elimination pass. This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not
+// catch, particularly involving loop computations.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "adce"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
+#include "llvm/BasicBlock.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
-#include <algorithm>
-using namespace llvm;
-
-STATISTIC(NumBlockRemoved, "Number of basic blocks removed");
-STATISTIC(NumInstRemoved , "Number of instructions removed");
-STATISTIC(NumCallRemoved , "Number of calls removed");
-
-namespace {
-//===----------------------------------------------------------------------===//
-// ADCE Class
-//
-// This class does all of the work of Aggressive Dead Code Elimination.
-// It's public interface consists of a constructor and a doADCE() method.
-//
-class VISIBILITY_HIDDEN ADCE : public FunctionPass {
- Function *Func; // The function that we are working on
- std::vector<Instruction*> WorkList; // Instructions that just became live
- std::set<Instruction*> LiveSet; // The set of live instructions
-
- //===--------------------------------------------------------------------===//
- // The public interface for this class
- //
-public:
- static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass((intptr_t)&ID) {}
- // Execute the Aggressive Dead Code Elimination Algorithm
- //
- virtual bool runOnFunction(Function &F) {
- Func = &F;
- bool Changed = doADCE();
- assert(WorkList.empty());
- LiveSet.clear();
- return Changed;
- }
- // getAnalysisUsage - We require post dominance frontiers (aka Control
- // Dependence Graph)
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // We require that all function nodes are unified, because otherwise code
- // can be marked live that wouldn't necessarily be otherwise.
- AU.addRequired<UnifyFunctionExitNodes>();
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<PostDominatorTree>();
- AU.addRequired<PostDominanceFrontier>();
- }
-
-
- //===--------------------------------------------------------------------===//
- // The implementation of this class
- //
-private:
- // doADCE() - Run the Aggressive Dead Code Elimination algorithm, returning
- // true if the function was modified.
- //
- bool doADCE();
-
- void markBlockAlive(BasicBlock *BB);
-
-
- // deleteDeadInstructionsInLiveBlock - Loop over all of the instructions in
- // the specified basic block, deleting ones that are dead according to
- // LiveSet.
- bool deleteDeadInstructionsInLiveBlock(BasicBlock *BB);
-
- TerminatorInst *convertToUnconditionalBranch(TerminatorInst *TI);
-
- inline void markInstructionLive(Instruction *I) {
- if (!LiveSet.insert(I).second) return;
- DOUT << "Insn Live: " << *I;
- WorkList.push_back(I);
- }
- inline void markTerminatorLive(const BasicBlock *BB) {
- DOUT << "Terminator Live: " << *BB->getTerminator();
- markInstructionLive(const_cast<TerminatorInst*>(BB->getTerminator()));
- }
-};
-
- char ADCE::ID = 0;
- RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
-} // End of anonymous namespace
-
-FunctionPass *llvm::createAggressiveDCEPass() { return new ADCE(); }
-
-void ADCE::markBlockAlive(BasicBlock *BB) {
- // Mark the basic block as being newly ALIVE... and mark all branches that
- // this block is control dependent on as being alive also...
- //
- PostDominanceFrontier &CDG = getAnalysis<PostDominanceFrontier>();
-
- PostDominanceFrontier::const_iterator It = CDG.find(BB);
- if (It != CDG.end()) {
- // Get the blocks that this node is control dependent on...
- const PostDominanceFrontier::DomSetType &CDB = It->second;
- for (PostDominanceFrontier::DomSetType::const_iterator I =
- CDB.begin(), E = CDB.end(); I != E; ++I)
- markTerminatorLive(*I); // Mark all their terminators as live
- }
-
- // If this basic block is live, and it ends in an unconditional branch, then
- // the branch is alive as well...
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isUnconditional())
- markTerminatorLive(BB);
-}
+using namespace llvm;
-// deleteDeadInstructionsInLiveBlock - Loop over all of the instructions in the
-// specified basic block, deleting ones that are dead according to LiveSet.
-bool ADCE::deleteDeadInstructionsInLiveBlock(BasicBlock *BB) {
- bool Changed = false;
- for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; ) {
- Instruction *I = II++;
- if (!LiveSet.count(I)) { // Is this instruction alive?
- if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+STATISTIC(NumRemoved, "Number of instructions removed");
- // Nope... remove the instruction from it's basic block...
- if (isa<CallInst>(I))
- ++NumCallRemoved;
- else
- ++NumInstRemoved;
- BB->getInstList().erase(I);
- Changed = true;
+namespace {
+ struct VISIBILITY_HIDDEN ADCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCE() : FunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnFunction(Function& F);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.setPreservesCFG();
}
- }
- return Changed;
+
+ };
}
+char ADCE::ID = 0;
+static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
-/// convertToUnconditionalBranch - Transform this conditional terminator
-/// instruction into an unconditional branch because we don't care which of the
-/// successors it goes to. This eliminate a use of the condition as well.
-///
-TerminatorInst *ADCE::convertToUnconditionalBranch(TerminatorInst *TI) {
- BranchInst *NB = BranchInst::Create(TI->getSuccessor(0), TI);
- BasicBlock *BB = TI->getParent();
-
- // Remove entries from PHI nodes to avoid confusing ourself later...
- for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
- TI->getSuccessor(i)->removePredecessor(BB);
-
- // Delete the old branch itself...
- BB->getInstList().erase(TI);
- return NB;
-}
-
-
-// doADCE() - Run the Aggressive Dead Code Elimination algorithm, returning
-// true if the function was modified.
-//
-bool ADCE::doADCE() {
- bool MadeChanges = false;
-
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
- // Iterate over all of the instructions in the function, eliminating trivially
- // dead instructions, and marking instructions live that are known to be
- // needed. Perform the walk in depth first order so that we avoid marking any
- // instructions live in basic blocks that are unreachable. These blocks will
- // be eliminated later, along with the instructions inside.
- //
- std::set<BasicBlock*> ReachableBBs;
- for (df_ext_iterator<BasicBlock*>
- BBI = df_ext_begin(&Func->front(), ReachableBBs),
- BBE = df_ext_end(&Func->front(), ReachableBBs); BBI != BBE; ++BBI) {
- BasicBlock *BB = *BBI;
- for (BasicBlock::iterator II = BB->begin(), EI = BB->end(); II != EI; ) {
- Instruction *I = II++;
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (AA.onlyReadsMemory(CI)) {
- if (CI->use_empty()) {
- BB->getInstList().erase(CI);
- ++NumCallRemoved;
- }
- } else {
- markInstructionLive(I);
- }
- } else if (I->mayWriteToMemory() || isa<ReturnInst>(I) ||
- isa<UnwindInst>(I) || isa<UnreachableInst>(I)) {
- // FIXME: Unreachable instructions should not be marked intrinsically
- // live here.
- markInstructionLive(I);
- } else if (isInstructionTriviallyDead(I)) {
- // Remove the instruction from it's basic block...
- BB->getInstList().erase(I);
- ++NumInstRemoved;
- }
- }
- }
-
- // Check to ensure we have an exit node for this CFG. If we don't, we won't
- // have any post-dominance information, thus we cannot perform our
- // transformations safely.
- //
- PostDominatorTree &DT = getAnalysis<PostDominatorTree>();
- if (DT[&Func->getEntryBlock()] == 0) {
- WorkList.clear();
- return MadeChanges;
- }
-
- // Scan the function marking blocks without post-dominance information as
- // live. Blocks without post-dominance information occur when there is an
- // infinite loop in the program. Because the infinite loop could contain a
- // function which unwinds, exits or has side-effects, we don't want to delete
- // the infinite loop or those blocks leading up to it.
- for (Function::iterator I = Func->begin(), E = Func->end(); I != E; ++I)
- if (DT[I] == 0 && ReachableBBs.count(I))
- for (pred_iterator PI = pred_begin(I), E = pred_end(I); PI != E; ++PI)
- markInstructionLive((*PI)->getTerminator());
-
- DOUT << "Processing work list\n";
-
- // AliveBlocks - Set of basic blocks that we know have instructions that are
- // alive in them...
- //
- std::set<BasicBlock*> AliveBlocks;
-
- // Process the work list of instructions that just became live... if they
- // became live, then that means that all of their operands are necessary as
- // well... make them live as well.
- //
- while (!WorkList.empty()) {
- Instruction *I = WorkList.back(); // Get an instruction that became live...
- WorkList.pop_back();
-
- BasicBlock *BB = I->getParent();
- if (!ReachableBBs.count(BB)) continue;
- if (AliveBlocks.insert(BB).second) // Basic block not alive yet.
- markBlockAlive(BB); // Make it so now!
-
- // PHI nodes are a special case, because the incoming values are actually
- // defined in the predecessor nodes of this block, meaning that the PHI
- // makes the predecessors alive.
- //
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- // If the incoming edge is clearly dead, it won't have control
- // dependence information. Do not mark it live.
- BasicBlock *PredBB = PN->getIncomingBlock(i);
- if (ReachableBBs.count(PredBB)) {
- // FIXME: This should mark the control dependent edge as live, not
- // necessarily the predecessor itself!
- if (AliveBlocks.insert(PredBB).second)
- markBlockAlive(PN->getIncomingBlock(i)); // Block is newly ALIVE!
- if (Instruction *Op = dyn_cast<Instruction>(PN->getIncomingValue(i)))
- markInstructionLive(Op);
- }
- }
- } else {
- // Loop over all of the operands of the live instruction, making sure that
- // they are known to be alive as well.
- //
- for (unsigned op = 0, End = I->getNumOperands(); op != End; ++op)
- if (Instruction *Operand = dyn_cast<Instruction>(I->getOperand(op)))
- markInstructionLive(Operand);
- }
- }
-
- DEBUG(
- DOUT << "Current Function: X = Live\n";
- for (Function::iterator I = Func->begin(), E = Func->end(); I != E; ++I){
- DOUT << I->getName() << ":\t"
- << (AliveBlocks.count(I) ? "LIVE\n" : "DEAD\n");
- for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI){
- if (LiveSet.count(BI)) DOUT << "X ";
- DOUT << *BI;
- }
- });
-
- // All blocks being live is a common case, handle it specially.
- if (AliveBlocks.size() == Func->size()) { // No dead blocks?
- for (Function::iterator I = Func->begin(), E = Func->end(); I != E; ++I) {
- // Loop over all of the instructions in the function deleting instructions
- // to drop their references.
- deleteDeadInstructionsInLiveBlock(I);
-
- // Check to make sure the terminator instruction is live. If it isn't,
- // this means that the condition that it branches on (we know it is not an
- // unconditional branch), is not needed to make the decision of where to
- // go to, because all outgoing edges go to the same place. We must remove
- // the use of the condition (because it's probably dead), so we convert
- // the terminator to an unconditional branch.
- //
- TerminatorInst *TI = I->getTerminator();
- if (!LiveSet.count(TI))
- convertToUnconditionalBranch(TI);
- }
-
- return MadeChanges;
- }
-
-
- // If the entry node is dead, insert a new entry node to eliminate the entry
- // node as a special case.
- //
- if (!AliveBlocks.count(&Func->front())) {
- BasicBlock *NewEntry = BasicBlock::Create();
- BranchInst::Create(&Func->front(), NewEntry);
- Func->getBasicBlockList().push_front(NewEntry);
- AliveBlocks.insert(NewEntry); // This block is always alive!
- LiveSet.insert(NewEntry->getTerminator()); // The branch is live
+bool ADCE::runOnFunction(Function& F) {
+ SmallPtrSet<Instruction*, 128> alive;
+ SmallVector<Instruction*, 128> worklist;
+
+ // Collect the set of "root" instructions that are known live.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+ I->mayWriteToMemory()) {
+ alive.insert(I.getInstructionIterator());
+ worklist.push_back(I.getInstructionIterator());
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!worklist.empty()) {
+ Instruction* curr = worklist.back();
+ worklist.pop_back();
+
+ for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+ OI != OE; ++OI)
+ if (Instruction* Inst = dyn_cast<Instruction>(OI))
+ if (alive.insert(Inst))
+ worklist.push_back(Inst);
+ }
+
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the worklist vector here for memory efficiency.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (!alive.count(I.getInstructionIterator())) {
+ worklist.push_back(I.getInstructionIterator());
+ I->dropAllReferences();
+ }
+
+ for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+ E = worklist.end(); I != E; ++I) {
+ NumRemoved++;
+ (*I)->eraseFromParent();
}
+
+ return !worklist.empty();
+}
- // Loop over all of the alive blocks in the function. If any successor
- // blocks are not alive, we adjust the outgoing branches to branch to the
- // first live postdominator of the live block, adjusting any PHI nodes in
- // the block to reflect this.
- //
- for (Function::iterator I = Func->begin(), E = Func->end(); I != E; ++I)
- if (AliveBlocks.count(I)) {
- BasicBlock *BB = I;
- TerminatorInst *TI = BB->getTerminator();
-
- // If the terminator instruction is alive, but the block it is contained
- // in IS alive, this means that this terminator is a conditional branch on
- // a condition that doesn't matter. Make it an unconditional branch to
- // ONE of the successors. This has the side effect of dropping a use of
- // the conditional value, which may also be dead.
- if (!LiveSet.count(TI))
- TI = convertToUnconditionalBranch(TI);
-
- // Loop over all of the successors, looking for ones that are not alive.
- // We cannot save the number of successors in the terminator instruction
- // here because we may remove them if we don't have a postdominator.
- //
- for (unsigned i = 0; i != TI->getNumSuccessors(); ++i)
- if (!AliveBlocks.count(TI->getSuccessor(i))) {
- // Scan up the postdominator tree, looking for the first
- // postdominator that is alive, and the last postdominator that is
- // dead...
- //
- DomTreeNode *LastNode = DT[TI->getSuccessor(i)];
- DomTreeNode *NextNode = 0;
-
- if (LastNode) {
- NextNode = LastNode->getIDom();
- while (!AliveBlocks.count(NextNode->getBlock())) {
- LastNode = NextNode;
- NextNode = NextNode->getIDom();
- if (NextNode == 0) {
- LastNode = 0;
- break;
- }
- }
- }
-
- // There is a special case here... if there IS no post-dominator for
- // the block we have nowhere to point our branch to. Instead, convert
- // it to a return. This can only happen if the code branched into an
- // infinite loop. Note that this may not be desirable, because we
- // _are_ altering the behavior of the code. This is a well known
- // drawback of ADCE, so in the future if we choose to revisit the
- // decision, this is where it should be.
- //
- if (LastNode == 0) { // No postdominator!
- if (!isa<InvokeInst>(TI)) {
- // Call RemoveSuccessor to transmogrify the terminator instruction
- // to not contain the outgoing branch, or to create a new
- // terminator if the form fundamentally changes (i.e.,
- // unconditional branch to return). Note that this will change a
- // branch into an infinite loop into a return instruction!
- //
- RemoveSuccessor(TI, i);
-
- // RemoveSuccessor may replace TI... make sure we have a fresh
- // pointer.
- //
- TI = BB->getTerminator();
-
- // Rescan this successor...
- --i;
- } else {
-
- }
- } else {
- // Get the basic blocks that we need...
- BasicBlock *LastDead = LastNode->getBlock();
- BasicBlock *NextAlive = NextNode->getBlock();
-
- // Make the conditional branch now go to the next alive block...
- TI->getSuccessor(i)->removePredecessor(BB);
- TI->setSuccessor(i, NextAlive);
-
- // If there are PHI nodes in NextAlive, we need to add entries to
- // the PHI nodes for the new incoming edge. The incoming values
- // should be identical to the incoming values for LastDead.
- //
- for (BasicBlock::iterator II = NextAlive->begin();
- isa<PHINode>(II); ++II) {
- PHINode *PN = cast<PHINode>(II);
- if (LiveSet.count(PN)) { // Only modify live phi nodes
- // Get the incoming value for LastDead...
- int OldIdx = PN->getBasicBlockIndex(LastDead);
- assert(OldIdx != -1 &&"LastDead is not a pred of NextAlive!");
- Value *InVal = PN->getIncomingValue(OldIdx);
-
- // Add an incoming value for BB now...
- PN->addIncoming(InVal, BB);
- }
- }
- }
- }
-
- // Now loop over all of the instructions in the basic block, deleting
- // dead instructions. This is so that the next sweep over the program
- // can safely delete dead instructions without other dead instructions
- // still referring to them.
- //
- deleteDeadInstructionsInLiveBlock(BB);
- }
-
- // Loop over all of the basic blocks in the function, dropping references of
- // the dead basic blocks. We must do this after the previous step to avoid
- // dropping references to PHIs which still have entries...
- //
- std::vector<BasicBlock*> DeadBlocks;
- for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB)
- if (!AliveBlocks.count(BB)) {
- // Remove PHI node entries for this block in live successor blocks.
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
- if (!SI->empty() && isa<PHINode>(SI->front()) && AliveBlocks.count(*SI))
- (*SI)->removePredecessor(BB);
-
- BB->dropAllReferences();
- MadeChanges = true;
- DeadBlocks.push_back(BB);
- }
-
- NumBlockRemoved += DeadBlocks.size();
-
- // Now loop through all of the blocks and delete the dead ones. We can safely
- // do this now because we know that there are no references to dead blocks
- // (because they have dropped all of their references).
- for (std::vector<BasicBlock*>::iterator I = DeadBlocks.begin(),
- E = DeadBlocks.end(); I != E; ++I)
- Func->getBasicBlockList().erase(*I);
-
- return MadeChanges;
+FunctionPass *llvm::createAggressiveDCEPass() {
+ return new ADCE();
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/BasicBlockPlacement.cpp Sun Jul 6 15:45:41 2008
@@ -72,12 +72,12 @@
/// successors.
void PlaceBlocks(BasicBlock *BB);
};
-
- char BlockPlacement::ID = 0;
- RegisterPass<BlockPlacement> X("block-placement",
- "Profile Guided Basic Block Placement");
}
+char BlockPlacement::ID = 0;
+static RegisterPass<BlockPlacement>
+X("block-placement", "Profile Guided Basic Block Placement");
+
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
bool BlockPlacement::runOnFunction(Function &F) {
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/CodeGenPrepare.cpp Sun Jul 6 15:45:41 2008
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
//
// This pass munges the code in the input function to better prepare it for
-// SelectionDAG-based code generation. This works around limitations in it's
-// basic-block-at-a-time approach. It should eventually be removed.
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
//
//===----------------------------------------------------------------------===//
@@ -333,16 +333,16 @@
/// Return true if any changes are made.
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If this is a noop copy,
- MVT::ValueType SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
- MVT::ValueType DstVT = TLI.getValueType(CI->getType());
+ MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(CI->getType());
// This is an fp<->int conversion?
- if (MVT::isInteger(SrcVT) != MVT::isInteger(DstVT))
+ if (SrcVT.isInteger() != DstVT.isInteger())
return false;
-
+
// If this is an extension, it will be a zero or sign extension, which
// isn't a noop.
- if (SrcVT < DstVT) return false;
+ if (SrcVT.bitsLT(DstVT)) return false;
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
@@ -385,11 +385,10 @@
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
InsertedCast =
- CastInst::create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+ CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
MadeChange = true;
}
@@ -443,11 +442,10 @@
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
InsertedCmp =
- CmpInst::create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
+ CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
CI->getOperand(1), "", InsertPt);
MadeChange = true;
}
@@ -483,6 +481,7 @@
}
}
+namespace {
/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode which
/// holds actual Value*'s for register values.
@@ -517,6 +516,8 @@
cerr << *this << "\n";
}
+}
+
static bool TryMatchingScaledValue(Value *ScaleReg, int64_t Scale,
const Type *AccessTy, ExtAddrMode &AddrMode,
SmallVector<Instruction*, 16> &AddrModeInsts,
@@ -877,7 +878,7 @@
V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
}
if (AddrMode.Scale != 1)
- V = BinaryOperator::createMul(V, ConstantInt::get(IntPtrTy,
+ V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
AddrMode.Scale),
"sunkaddr", InsertPt);
Result = V;
@@ -889,7 +890,7 @@
if (V->getType() != IntPtrTy)
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
if (Result)
- Result = BinaryOperator::createAdd(Result, V, "sunkaddr", InsertPt);
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
@@ -899,7 +900,7 @@
Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
InsertPt);
if (Result)
- Result = BinaryOperator::createAdd(Result, V, "sunkaddr", InsertPt);
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
@@ -908,7 +909,7 @@
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
- Result = BinaryOperator::createAdd(Result, V, "sunkaddr", InsertPt);
+ Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
@@ -962,7 +963,7 @@
}
// Compute the constraint code and ConstraintType to use.
- OpInfo.ComputeConstraintToUse(*TLI);
+ TLI->ComputeConstraintToUse(OpInfo, SDOperand());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
@@ -1036,8 +1037,7 @@
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
@@ -1127,15 +1127,6 @@
// Sink address computing for memory operands into the block.
MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
}
- } else if (GetResultInst *GRI = dyn_cast<GetResultInst>(I)) {
- // Ensure that all getresult instructions live in the same basic block
- // as their associated struct-value instructions. Codegen requires
- // this, as lowering only works on one basic block at a time.
- if (Instruction *Agg = dyn_cast<Instruction>(GRI->getAggregateValue())) {
- BasicBlock *AggBB = Agg->getParent();
- if (AggBB != GRI->getParent())
- GRI->moveBefore(AggBB->getTerminator());
- }
}
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/CondPropagate.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/CondPropagate.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/CondPropagate.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/CondPropagate.cpp Sun Jul 6 15:45:41 2008
@@ -48,10 +48,10 @@
void SimplifyPredecessors(SwitchInst *SI);
void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB);
};
-
- char CondProp::ID = 0;
- RegisterPass<CondProp> X("condprop", "Conditional Propagation");
}
+
+char CondProp::ID = 0;
+static RegisterPass<CondProp> X("condprop", "Conditional Propagation");
FunctionPass *llvm::createCondPropagationPass() {
return new CondProp();
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/ConstantProp.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/ConstantProp.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/ConstantProp.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/ConstantProp.cpp Sun Jul 6 15:45:41 2008
@@ -43,12 +43,12 @@
AU.setPreservesCFG();
}
};
-
- char ConstantPropagation::ID = 0;
- RegisterPass<ConstantPropagation> X("constprop",
- "Simple constant propagation");
}
+char ConstantPropagation::ID = 0;
+static RegisterPass<ConstantPropagation>
+X("constprop", "Simple constant propagation");
+
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
}
@@ -79,7 +79,7 @@
// Remove the dead instruction.
WorkList.erase(I);
- I->getParent()->getInstList().erase(I);
+ I->eraseFromParent();
// We made a change to the function...
Changed = true;
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/DCE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/DCE.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/DCE.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/DCE.cpp Sun Jul 6 15:45:41 2008
@@ -52,11 +52,12 @@
AU.setPreservesCFG();
}
};
-
- char DeadInstElimination::ID = 0;
- RegisterPass<DeadInstElimination> X("die", "Dead Instruction Elimination");
}
+char DeadInstElimination::ID = 0;
+static RegisterPass<DeadInstElimination>
+X("die", "Dead Instruction Elimination");
+
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
}
@@ -76,11 +77,11 @@
AU.setPreservesCFG();
}
};
-
- char DCE::ID = 0;
- RegisterPass<DCE> Y("dce", "Dead Code Elimination");
}
+char DCE::ID = 0;
+static RegisterPass<DCE> Y("dce", "Dead Code Elimination");
+
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
std::vector<Instruction*> WorkList;
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/DeadStoreElimination.cpp Sun Jul 6 15:45:41 2008
@@ -92,10 +92,11 @@
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
- char DSE::ID = 0;
- RegisterPass<DSE> X("dse", "Dead Store Elimination");
}
+char DSE::ID = 0;
+static RegisterPass<DSE> X("dse", "Dead Store Elimination");
+
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -326,9 +327,9 @@
// If we encounter a use of the pointer, it is no longer considered dead
if (LoadInst* L = dyn_cast<LoadInst>(BBI)) {
- // However, if this load is unused, we can go ahead and remove it, and
- // not have to worry about it making our pointer undead!
- if (L->use_empty()) {
+ // However, if this load is unused and not volatile, we can go ahead and
+ // remove it, and not have to worry about it making our pointer undead!
+ if (L->use_empty() && !L->isVolatile()) {
MD.removeInstruction(L);
// DCE instructions only used to calculate that load
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/GCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/GCSE.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/GCSE.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/GCSE.cpp Sun Jul 6 15:45:41 2008
@@ -9,9 +9,12 @@
//
// This pass is designed to be a very quick global transformation that
// eliminates global common subexpressions from a function. It does this by
-// using an existing value numbering implementation to identify the common
+// using an existing value numbering analysis pass to identify the common
// subexpressions, eliminating them when possible.
//
+// This pass is deprecated by the Global Value Numbering pass (which does a
+// better job with its own value numbering).
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "gcse"
@@ -52,11 +55,12 @@
AU.addRequired<ValueNumbering>();
}
};
-
- char GCSE::ID = 0;
- RegisterPass<GCSE> X("gcse", "Global Common Subexpression Elimination");
}
+char GCSE::ID = 0;
+static RegisterPass<GCSE>
+X("gcse", "Global Common Subexpression Elimination");
+
// createGCSEPass - The public interface to this file...
FunctionPass *llvm::createGCSEPass() { return new GCSE(); }
@@ -197,5 +201,5 @@
}
// Erase the instruction from the program.
- I->getParent()->getInstList().erase(I);
+ I->eraseFromParent();
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/GVN.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/GVN.cpp Sun Jul 6 15:45:41 2008
@@ -10,6 +10,9 @@
// This pass performs global value numbering to eliminate fully redundant
// instructions. It also performs simple dead load elimination.
//
+// Note that this pass does the value numbering itself, it does not use the
+// ValueNumbering analysis passes.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "gvn"
@@ -18,15 +21,12 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/ParameterAttributes.h"
#include "llvm/Value.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -35,13 +35,15 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Target/TargetData.h"
-#include <list>
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
+STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+ cl::init(false), cl::Hidden);
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -61,8 +63,8 @@
FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
- PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY,
- TOMBSTONE };
+ PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
+ EMPTY, TOMBSTONE };
ExpressionOpcode opcode;
const Type* type;
@@ -136,6 +138,7 @@
DenseMap<Expression, uint32_t> expressionNumbering;
AliasAnalysis* AA;
MemoryDependenceAnalysis* MD;
+ DominatorTree* DT;
uint32_t nextValueNumber;
@@ -151,6 +154,7 @@
Expression create_expression(CastInst* C);
Expression create_expression(GetElementPtrInst* G);
Expression create_expression(CallInst* C);
+ Expression create_expression(Constant* C);
public:
ValueTable() : nextValueNumber(1) { }
uint32_t lookup_or_add(Value* V);
@@ -161,6 +165,8 @@
unsigned size();
void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
};
}
@@ -228,7 +234,7 @@
}
Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
- if (isa<ICmpInst>(C)) {
+ if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) {
switch (C->getPredicate()) {
default: // THIS SHOULD NEVER HAPPEN
assert(0 && "Comparison with unknown predicate?");
@@ -244,7 +250,7 @@
case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
}
}
- assert(isa<FCmpInst>(C) && "Unknown compare");
+ assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare");
switch (C->getPredicate()) {
default: // THIS SHOULD NEVER HAPPEN
assert(0 && "Comparison with unknown predicate?");
@@ -394,7 +400,7 @@
Expression ValueTable::create_expression(GetElementPtrInst* G) {
Expression e;
-
+
e.firstVN = lookup_or_add(G->getPointerOperand());
e.secondVN = 0;
e.thirdVN = 0;
@@ -413,6 +419,11 @@
// ValueTable External Functions
//===----------------------------------------------------------------------===//
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value* V, uint32_t num) {
+ valueNumbering.insert(std::make_pair(V, num));
+}
+
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t ValueTable::lookup_or_add(Value* V) {
@@ -437,26 +448,97 @@
} else if (AA->onlyReadsMemory(C)) {
Expression e = create_expression(C);
- Instruction* dep = MD->getDependency(C);
-
- if (dep == MemoryDependenceAnalysis::NonLocal ||
- !isa<CallInst>(dep)) {
+ if (expressionNumbering.find(e) == expressionNumbering.end()) {
expressionNumbering.insert(std::make_pair(e, nextValueNumber));
valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+
+ Instruction* local_dep = MD->getDependency(C);
+ if (local_dep == MemoryDependenceAnalysis::None) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
return nextValueNumber++;
+ } else if (local_dep != MemoryDependenceAnalysis::NonLocal) {
+ if (!isa<CallInst>(local_dep)) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+
+ CallInst* local_cdep = cast<CallInst>(local_dep);
+
+ if (local_cdep->getCalledFunction() != C->getCalledFunction() ||
+ local_cdep->getNumOperands() != C->getNumOperands()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ } else if (!C->getCalledFunction()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ } else {
+ for (unsigned i = 1; i < C->getNumOperands(); ++i) {
+ uint32_t c_vn = lookup_or_add(C->getOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(local_cdep);
+ valueNumbering.insert(std::make_pair(V, v));
+ return v;
+ }
}
- CallInst* cdep = cast<CallInst>(dep);
- Expression d_exp = create_expression(cdep);
- if (e != d_exp) {
- expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ DenseMap<BasicBlock*, Value*> deps;
+ MD->getNonLocalDependency(C, deps);
+ CallInst* cdep = 0;
+
+ for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(),
+ E = deps.end(); I != E; ++I) {
+ if (I->second == MemoryDependenceAnalysis::None) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ } else if (I->second != MemoryDependenceAnalysis::NonLocal) {
+ if (DT->properlyDominates(I->first, C->getParent())) {
+ if (CallInst* CD = dyn_cast<CallInst>(I->second))
+ cdep = CD;
+ else {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ } else {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ }
+ }
+
+ if (!cdep) {
valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ if (cdep->getCalledFunction() != C->getCalledFunction() ||
+ cdep->getNumOperands() != C->getNumOperands()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ } else if (!C->getCalledFunction()) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
return nextValueNumber++;
} else {
- uint32_t v = expressionNumbering[d_exp];
+ for (unsigned i = 1; i < C->getNumOperands(); ++i) {
+ uint32_t c_vn = lookup_or_add(C->getOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ if (c_vn != cd_vn) {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ }
+
+ uint32_t v = lookup_or_add(cdep);
valueNumbering.insert(std::make_pair(V, v));
return v;
}
@@ -596,53 +678,29 @@
}
//===----------------------------------------------------------------------===//
-// ValueNumberedSet Class
+// GVN Pass
//===----------------------------------------------------------------------===//
-namespace {
-class VISIBILITY_HIDDEN ValueNumberedSet {
- private:
- SmallPtrSet<Value*, 8> contents;
- SparseBitVector<64> numbers;
- public:
- ValueNumberedSet() { }
- ValueNumberedSet(const ValueNumberedSet& other) {
- numbers = other.numbers;
- contents = other.contents;
- }
-
- typedef SmallPtrSet<Value*, 8>::iterator iterator;
-
- iterator begin() { return contents.begin(); }
- iterator end() { return contents.end(); }
-
- bool insert(Value* v) { return contents.insert(v); }
- void insert(iterator I, iterator E) { contents.insert(I, E); }
- void erase(Value* v) { contents.erase(v); }
- unsigned count(Value* v) { return contents.count(v); }
- size_t size() { return contents.size(); }
-
- void set(unsigned i) {
- numbers.set(i);
- }
-
- void operator=(const ValueNumberedSet& other) {
- contents = other.contents;
- numbers = other.numbers;
- }
-
- void reset(unsigned i) {
- numbers.reset(i);
- }
-
- bool test(unsigned i) {
- return numbers.test(i);
+
+namespace llvm {
+ template<> struct DenseMapInfo<uint32_t> {
+ static inline uint32_t getEmptyKey() { return ~0; }
+ static inline uint32_t getTombstoneKey() { return ~0 - 1; }
+ static unsigned getHashValue(const uint32_t& Val) { return Val * 37; }
+ static bool isPod() { return true; }
+ static bool isEqual(const uint32_t& LHS, const uint32_t& RHS) {
+ return LHS == RHS;
}
-};
+ };
}
-//===----------------------------------------------------------------------===//
-// GVN Pass
-//===----------------------------------------------------------------------===//
+namespace {
+ struct VISIBILITY_HIDDEN ValueNumberScope {
+ ValueNumberScope* parent;
+ DenseMap<uint32_t, Value*> table;
+
+ ValueNumberScope(ValueNumberScope* p) : parent(p) { }
+ };
+}
namespace {
@@ -654,8 +712,7 @@
private:
ValueTable VN;
-
- DenseMap<BasicBlock*, ValueNumberedSet> availableOut;
+ DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
PhiMapType phiMap;
@@ -663,36 +720,35 @@
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetData>();
+
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
- AU.addPreserved<TargetData>();
}
// Helper fuctions
// FIXME: eliminate or document these better
- Value* find_leader(ValueNumberedSet& vals, uint32_t v) ;
- void val_insert(ValueNumberedSet& s, Value* v);
bool processLoad(LoadInst* L,
DenseMap<Value*, LoadInst*> &lastLoad,
SmallVectorImpl<Instruction*> &toErase);
bool processInstruction(Instruction* I,
- ValueNumberedSet& currAvail,
DenseMap<Value*, LoadInst*>& lastSeenLoad,
SmallVectorImpl<Instruction*> &toErase);
bool processNonLocalLoad(LoadInst* L,
SmallVectorImpl<Instruction*> &toErase);
+ bool processBlock(DomTreeNode* DTN);
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
DenseMap<BasicBlock*, Value*> &Phis,
bool top_level = false);
- void dump(DenseMap<BasicBlock*, Value*>& d);
+ void dump(DenseMap<uint32_t, Value*>& d);
bool iterateOnFunction(Function &F);
Value* CollapsePhi(PHINode* p);
bool isSafeReplacement(PHINode* p, Instruction* inst);
+ bool performPRE(Function& F);
+ Value* lookupNumber(BasicBlock* BB, uint32_t num);
};
char GVN::ID = 0;
@@ -704,37 +760,11 @@
static RegisterPass<GVN> X("gvn",
"Global Value Numbering");
-/// find_leader - Given a set and a value number, return the first
-/// element of the set with that value number, or 0 if no such element
-/// is present
-Value* GVN::find_leader(ValueNumberedSet& vals, uint32_t v) {
- if (!vals.test(v))
- return 0;
-
- for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end();
- I != E; ++I)
- if (v == VN.lookup(*I))
- return *I;
-
- assert(0 && "No leader found, but present bit is set?");
- return 0;
-}
-
-/// val_insert - Insert a value into a set only if there is not a value
-/// with the same value number already in the set
-void GVN::val_insert(ValueNumberedSet& s, Value* v) {
- uint32_t num = VN.lookup(v);
- if (!s.test(num))
- s.insert(v);
-}
-
-void GVN::dump(DenseMap<BasicBlock*, Value*>& d) {
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
printf("{\n");
- for (DenseMap<BasicBlock*, Value*>::iterator I = d.begin(),
+ for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
- if (I->second == MemoryDependenceAnalysis::None)
- printf("None\n");
- else
+ printf("%d\n", I->first);
I->second->dump();
}
printf("}\n");
@@ -779,6 +809,11 @@
DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
if (V != Phis.end() && !top_level) return V->second;
+ // If the block is unreachable, just return undef, since this path
+ // can't actually occur at runtime.
+ if (!getAnalysis<DominatorTree>().isReachableFromEntry(BB))
+ return Phis[BB] = UndefValue::get(orig->getType());
+
BasicBlock* singlePred = BB->getSinglePredecessor();
if (singlePred) {
Value *ret = GetValueForBlock(singlePred, orig, Phis);
@@ -990,20 +1025,49 @@
return deletedLoad;
}
+Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) {
+ DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
+ if (I == localAvail.end())
+ return 0;
+
+ ValueNumberScope* locals = I->second;
+
+ while (locals) {
+ DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num);
+ if (I != locals->table.end())
+ return I->second;
+ else
+ locals = locals->parent;
+ }
+
+ return 0;
+}
+
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I, ValueNumberedSet &currAvail,
+bool GVN::processInstruction(Instruction *I,
DenseMap<Value*, LoadInst*> &lastSeenLoad,
SmallVectorImpl<Instruction*> &toErase) {
- if (LoadInst* L = dyn_cast<LoadInst>(I))
- return processLoad(L, lastSeenLoad, toErase);
+ if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ bool changed = processLoad(L, lastSeenLoad, toErase);
+
+ if (!changed) {
+ unsigned num = VN.lookup_or_add(L);
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, L));
+ }
+
+ return changed;
+ }
+
+ uint32_t nextNum = VN.getNextUnusedValueNumber();
+ unsigned num = VN.lookup_or_add(I);
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
- if (isa<AllocationInst>(I))
+ if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
return false;
-
- unsigned num = VN.lookup_or_add(I);
+ }
// Collapse PHI nodes
if (PHINode* p = dyn_cast<PHINode>(I)) {
@@ -1017,11 +1081,18 @@
p->replaceAllUsesWith(constVal);
toErase.push_back(p);
+ } else {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
}
- // Perform value-number based elimination
- } else if (currAvail.test(num)) {
- Value* repl = find_leader(currAvail, num);
+
+ // If the number we were assigned was a brand new VN, then we don't
+ // need to do a lookup to see if the number already exists
+ // somewhere in the domtree: it can't!
+ } else if (num == nextNum) {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
+ // Perform value-number based elimination
+ } else if (Value* repl = lookupNumber(I->getParent(), num)) {
// Remove it!
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
MD.removeInstruction(I);
@@ -1030,9 +1101,8 @@
I->replaceAllUsesWith(repl);
toErase.push_back(I);
return true;
- } else if (!I->isTerminator()) {
- currAvail.set(num);
- currAvail.insert(I);
+ } else {
+ localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
}
return false;
@@ -1044,6 +1114,7 @@
bool GVN::runOnFunction(Function& F) {
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(&getAnalysis<MemoryDependenceAnalysis>());
+ VN.setDomTree(&getAnalysis<DominatorTree>());
bool changed = false;
bool shouldContinue = true;
@@ -1057,72 +1128,220 @@
}
-// GVN::iterateOnFunction - Executes one iteration of GVN
-bool GVN::iterateOnFunction(Function &F) {
- // Clean out global sets from any previous functions
- VN.clear();
- availableOut.clear();
- phiMap.clear();
-
- bool changed_function = false;
-
- DominatorTree &DT = getAnalysis<DominatorTree>();
-
+bool GVN::processBlock(DomTreeNode* DTN) {
+ BasicBlock* BB = DTN->getBlock();
+
SmallVector<Instruction*, 8> toErase;
DenseMap<Value*, LoadInst*> lastSeenLoad;
- DenseMap<DomTreeNode*, size_t> numChildrenVisited;
-
- // Top-down walk of the dominator tree
- for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
- E = df_end(DT.getRootNode()); DI != E; ++DI) {
+ bool changed_function = false;
+
+ if (DTN->getIDom())
+ localAvail[BB] =
+ new ValueNumberScope(localAvail[DTN->getIDom()->getBlock()]);
+ else
+ localAvail[BB] = new ValueNumberScope(0);
+
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+ BI != BE;) {
+ changed_function |= processInstruction(BI, lastSeenLoad, toErase);
+ if (toErase.empty()) {
+ ++BI;
+ continue;
+ }
- // Get the set to update for this block
- ValueNumberedSet& currAvail = availableOut[DI->getBlock()];
- lastSeenLoad.clear();
+ // If we need some instructions deleted, do it now.
+ NumGVNInstr += toErase.size();
+
+ // Avoid iterator invalidation.
+ bool AtStart = BI == BB->begin();
+ if (!AtStart)
+ --BI;
+
+ for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
- BasicBlock* BB = DI->getBlock();
+ if (AtStart)
+ BI = BB->begin();
+ else
+ ++BI;
+
+ toErase.clear();
+ }
- // A block inherits AVAIL_OUT from its dominator
- if (DI->getIDom() != 0) {
- currAvail = availableOut[DI->getIDom()->getBlock()];
+ return changed_function;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function& F) {
+ bool changed = false;
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+ BasicBlock* CurrentBlock = *DI;
+
+ // Nothing to PRE in the entry block.
+ if (CurrentBlock == &F.getEntryBlock()) continue;
+
+ for (BasicBlock::iterator BI = CurrentBlock->begin(),
+ BE = CurrentBlock->end(); BI != BE; ) {
+ if (isa<AllocationInst>(BI) || isa<TerminatorInst>(BI) ||
+ isa<PHINode>(BI) || BI->mayReadFromMemory() ||
+ BI->mayWriteToMemory()) {
+ BI++;
+ continue;
+ }
+
+ uint32_t valno = VN.lookup(BI);
- numChildrenVisited[DI->getIDom()]++;
+ // Look for the predecessors for PRE opportunities. We're
+ // only trying to solve the basic diamond case, where
+ // a value is computed in the successor and one predecessor,
+ // but not the other. We also explicitly disallow cases
+ // where the successor is its own predecessor, because they're
+ // more complicated to get right.
+ unsigned numWith = 0;
+ unsigned numWithout = 0;
+ BasicBlock* PREPred = 0;
+ DenseMap<BasicBlock*, Value*> predMap;
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ // We're not interested in PRE where the block is its
+ // own predecessor, on in blocks with predecessors
+ // that are not reachable.
+ if (*PI == CurrentBlock) {
+ numWithout = 2;
+ break;
+ } else if (!localAvail.count(*PI)) {
+ numWithout = 2;
+ break;
+ }
+
+ DenseMap<uint32_t, Value*>::iterator predV =
+ localAvail[*PI]->table.find(valno);
+ if (predV == localAvail[*PI]->table.end()) {
+ PREPred = *PI;
+ numWithout++;
+ } else if (predV->second == BI) {
+ numWithout = 2;
+ } else {
+ predMap[*PI] = predV->second;
+ numWith++;
+ }
+ }
- if (numChildrenVisited[DI->getIDom()] == DI->getIDom()->getNumChildren()) {
- availableOut.erase(DI->getIDom()->getBlock());
- numChildrenVisited.erase(DI->getIDom());
+ // Don't do PRE when it might increase code size, i.e. when
+ // we would need to insert instructions in more than one pred.
+ if (numWithout != 1 || numWith == 0) {
+ BI++;
+ continue;
}
- }
-
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE;) {
- changed_function |= processInstruction(BI, currAvail,
- lastSeenLoad, toErase);
- if (toErase.empty()) {
- ++BI;
+
+ // We can't do PRE safely on a critical edge, so instead we schedule
+ // the edge to be split and perform the PRE the next time we iterate
+ // on the function.
+ unsigned succNum = 0;
+ for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
+ i != e; ++i)
+ if (PREPred->getTerminator()->getSuccessor(i) == PREPred) {
+ succNum = i;
+ break;
+ }
+
+ if (isCriticalEdge(PREPred->getTerminator(), succNum)) {
+ toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum));
+ changed = true;
+ BI++;
continue;
}
- // If we need some instructions deleted, do it now.
- NumGVNInstr += toErase.size();
+ // Instantiate the expression the in predecessor that lacked it.
+ // Because we are going top-down through the block, all value numbers
+ // will be available in the predecessor by the time we need them. Any
+ // that weren't original present will have been instantiated earlier
+ // in this loop.
+ Instruction* PREInstr = BI->clone();
+ bool success = true;
+ for (unsigned i = 0; i < BI->getNumOperands(); ++i) {
+ Value* op = BI->getOperand(i);
+ if (isa<Argument>(op) || isa<Constant>(op) || isa<GlobalValue>(op))
+ PREInstr->setOperand(i, op);
+ else if (!lookupNumber(PREPred, VN.lookup(op))) {
+ success = false;
+ break;
+ } else
+ PREInstr->setOperand(i, lookupNumber(PREPred, VN.lookup(op)));
+ }
- // Avoid iterator invalidation.
- bool AtStart = BI == BB->begin();
- if (!AtStart)
- --BI;
-
- for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I)
- (*I)->eraseFromParent();
-
- if (AtStart)
- BI = BB->begin();
- else
- ++BI;
+ // Fail out if we encounter an operand that is not available in
+ // the PRE predecessor. This is typically because of loads which
+ // are not value numbered precisely.
+ if (!success) {
+ delete PREInstr;
+ BI++;
+ continue;
+ }
+
+ PREInstr->insertBefore(PREPred->getTerminator());
+ PREInstr->setName(BI->getName() + ".pre");
+ predMap[PREPred] = PREInstr;
+ VN.add(PREInstr, valno);
+ NumGVNPRE++;
+
+ // Update the availability map to include the new instruction.
+ localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr));
+
+ // Create a PHI to make the value available in this block.
+ PHINode* Phi = PHINode::Create(BI->getType(),
+ BI->getName() + ".pre-phi",
+ CurrentBlock->begin());
+ for (pred_iterator PI = pred_begin(CurrentBlock),
+ PE = pred_end(CurrentBlock); PI != PE; ++PI)
+ Phi->addIncoming(predMap[*PI], *PI);
- toErase.clear();
+ VN.add(Phi, valno);
+ localAvail[CurrentBlock]->table[valno] = Phi;
+
+ BI->replaceAllUsesWith(Phi);
+ VN.erase(BI);
+
+ Instruction* erase = BI;
+ BI++;
+ erase->eraseFromParent();
+
+ changed = true;
}
}
- return changed_function;
+ for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
+ I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
+ SplitCriticalEdge(I->first, I->second, this);
+
+ return changed;
+}
+
+// GVN::iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+ // Clean out global sets from any previous functions
+ VN.clear();
+ phiMap.clear();
+
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
+ delete I->second;
+ localAvail.clear();
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
+ // Top-down walk of the dominator tree
+ bool changed = false;
+ for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
+ DE = df_end(DT.getRootNode()); DI != DE; ++DI)
+ changed |= processBlock(*DI);
+
+ if (EnablePRE)
+ changed |= performPRE(F);
+
+ return changed;
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/GVNPRE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/GVNPRE.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/GVNPRE.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/GVNPRE.cpp Sun Jul 6 15:45:41 2008
@@ -16,6 +16,9 @@
// live ranges, and should be used with caution on platforms that are very
// sensitive to register pressure.
//
+// Note that this pass does the value numbering itself, it does not use the
+// ValueNumbering analysis passes.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "gvnpre"
@@ -45,6 +48,8 @@
// ValueTable Class
//===----------------------------------------------------------------------===//
+namespace {
+
/// This class holds the mapping between values and value numbers. It is used
/// as an efficient mechanism to determine the expression-wise equivalence of
/// two values.
@@ -123,6 +128,7 @@
}
};
+}
namespace {
class VISIBILITY_HIDDEN ValueTable {
@@ -596,6 +602,8 @@
return nextValueNumber;
}
+namespace {
+
//===----------------------------------------------------------------------===//
// ValueNumberedSet Class
//===----------------------------------------------------------------------===//
@@ -652,6 +660,8 @@
}
};
+}
+
//===----------------------------------------------------------------------===//
// GVNPRE Pass
//===----------------------------------------------------------------------===//
@@ -797,7 +807,7 @@
if (newOp1 != U->getOperand(0)) {
Instruction* newVal = 0;
if (CastInst* C = dyn_cast<CastInst>(U))
- newVal = CastInst::create(C->getOpcode(),
+ newVal = CastInst::Create(C->getOpcode(),
newOp1, C->getType(),
C->getName()+".expr");
@@ -840,11 +850,11 @@
if (newOp1 != U->getOperand(0) || newOp2 != U->getOperand(1)) {
Instruction* newVal = 0;
if (BinaryOperator* BO = dyn_cast<BinaryOperator>(U))
- newVal = BinaryOperator::create(BO->getOpcode(),
+ newVal = BinaryOperator::Create(BO->getOpcode(),
newOp1, newOp2,
BO->getName()+".expr");
else if (CmpInst* C = dyn_cast<CmpInst>(U))
- newVal = CmpInst::create(C->getOpcode(),
+ newVal = CmpInst::Create(C->getOpcode(),
C->getPredicate(),
newOp1, newOp2,
C->getName()+".expr");
@@ -902,12 +912,13 @@
Instruction* newVal = 0;
if (ShuffleVectorInst* S = dyn_cast<ShuffleVectorInst>(U))
newVal = new ShuffleVectorInst(newOp1, newOp2, newOp3,
- S->getName()+".expr");
+ S->getName() + ".expr");
else if (InsertElementInst* I = dyn_cast<InsertElementInst>(U))
newVal = InsertElementInst::Create(newOp1, newOp2, newOp3,
- I->getName()+".expr");
+ I->getName() + ".expr");
else if (SelectInst* I = dyn_cast<SelectInst>(U))
- newVal = SelectInst::Create(newOp1, newOp2, newOp3, I->getName()+".expr");
+ newVal = SelectInst::Create(newOp1, newOp2, newOp3,
+ I->getName() + ".expr");
uint32_t v = VN.lookup_or_add(newVal);
@@ -1657,11 +1668,11 @@
Value* newVal = 0;
if (BinaryOperator* BO = dyn_cast<BinaryOperator>(U))
- newVal = BinaryOperator::create(BO->getOpcode(), s1, s2,
+ newVal = BinaryOperator::Create(BO->getOpcode(), s1, s2,
BO->getName()+".gvnpre",
(*PI)->getTerminator());
else if (CmpInst* C = dyn_cast<CmpInst>(U))
- newVal = CmpInst::create(C->getOpcode(), C->getPredicate(), s1, s2,
+ newVal = CmpInst::Create(C->getOpcode(), C->getPredicate(), s1, s2,
C->getName()+".gvnpre",
(*PI)->getTerminator());
else if (ShuffleVectorInst* S = dyn_cast<ShuffleVectorInst>(U))
@@ -1677,7 +1688,7 @@
newVal = SelectInst::Create(s1, s2, s3, S->getName()+".gvnpre",
(*PI)->getTerminator());
else if (CastInst* C = dyn_cast<CastInst>(U))
- newVal = CastInst::create(C->getOpcode(), s1, C->getType(),
+ newVal = CastInst::Create(C->getOpcode(), s1, C->getType(),
C->getName()+".gvnpre",
(*PI)->getTerminator());
else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(U))
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/IndVarSimplify.cpp Sun Jul 6 15:45:41 2008
@@ -94,11 +94,12 @@
void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
};
-
- char IndVarSimplify::ID = 0;
- RegisterPass<IndVarSimplify> X("indvars", "Canonicalize Induction Variables");
}
+char IndVarSimplify::ID = 0;
+static RegisterPass<IndVarSimplify>
+X("indvars", "Canonicalize Induction Variables");
+
LoopPass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
@@ -150,7 +151,7 @@
NewPhi->addIncoming(Constant::getNullValue(NewPhi->getType()), Preheader);
// Create the new add instruction.
- Value *NewAdd = BinaryOperator::createAdd(NewPhi, AddedVal,
+ Value *NewAdd = BinaryOperator::CreateAdd(NewPhi, AddedVal,
GEPI->getName()+".rec", GEPI);
NewPhi->addIncoming(NewAdd, PN->getIncomingBlock(BackedgeIdx));
@@ -318,8 +319,7 @@
BlockToInsertInto = ExitBlocks[0];
else
BlockToInsertInto = Preheader;
- BasicBlock::iterator InsertPt = BlockToInsertInto->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
bool HasConstantItCount = isa<SCEVConstant>(SE->getIterationCount(L));
@@ -522,11 +522,7 @@
DOUT << "INDVARS: New CanIV: " << *IndVar;
if (!isa<SCEVCouldNotCompute>(IterationCount)) {
- if (IterationCount->getType()->getPrimitiveSizeInBits() <
- LargestType->getPrimitiveSizeInBits())
- IterationCount = SE->getZeroExtendExpr(IterationCount, LargestType);
- else if (IterationCount->getType() != LargestType)
- IterationCount = SE->getTruncateExpr(IterationCount, LargestType);
+ IterationCount = SE->getTruncateOrZeroExtend(IterationCount, LargestType);
if (Instruction *DI = LinearFunctionTestReplace(L, IterationCount,Rewriter))
DeadInsts.insert(DI);
}
@@ -534,8 +530,7 @@
// Now that we have a canonical induction variable, we can rewrite any
// recurrences in terms of the induction variable. Start with the auxillary
// induction variables, and recursively rewrite any of their uses.
- BasicBlock::iterator InsertPt = Header->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = Header->getFirstNonPHI();
// If there were induction variables of other sizes, cast the primary
// induction variable to the right size for them, avoiding the need for the
@@ -579,9 +574,10 @@
#if 0
// Now replace all derived expressions in the loop body with simpler
// expressions.
- for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i)
- if (LI->getLoopFor(L->getBlocks()[i]) == L) { // Not in a subloop...
- BasicBlock *BB = L->getBlocks()[i];
+ for (LoopInfo::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (LI->getLoopFor(BB) == L) { // Not in a subloop...
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (I->getType()->isInteger() && // Is an integer instruction
!I->use_empty() &&
@@ -598,6 +594,7 @@
}
}
}
+ }
#endif
DeleteTriviallyDeadInstructions(DeadInsts);
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/InstructionCombining.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/InstructionCombining.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/InstructionCombining.cpp Sun Jul 6 15:45:41 2008
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
//
// InstructionCombining - Combine instructions to form fewer, simple
-// instructions. This pass does not modify the CFG This pass is where algebraic
-// simplification happens.
+// instructions. This pass does not modify the CFG. This pass is where
+// algebraic simplification happens.
//
// This pass combines things like:
// %Y = add i32 %X, 1
@@ -40,6 +40,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -121,8 +122,8 @@
/// the work lists because they might get more simplified now.
///
void AddUsesToWorkList(Instruction &I) {
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i)))
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i))
AddToWorkList(Op);
}
@@ -135,11 +136,11 @@
Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
Value *R = I.getOperand(op);
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i)) {
AddToWorkList(Op);
// Set the operand to undef to drop the use.
- I.setOperand(i, UndefValue::get(Op->getType()));
+ *i = UndefValue::get(Op->getType());
}
return R;
@@ -185,6 +186,8 @@
Instruction *visitAShr(BinaryOperator &I);
Instruction *visitLShr(BinaryOperator &I);
Instruction *commonShiftTransforms(BinaryOperator &I);
+ Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC);
Instruction *visitFCmpInst(FCmpInst &I);
Instruction *visitICmpInst(ICmpInst &I);
Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
@@ -206,8 +209,8 @@
Instruction *visitSExt(SExtInst &CI);
Instruction *visitFPTrunc(FPTruncInst &CI);
Instruction *visitFPExt(CastInst &CI);
- Instruction *visitFPToUI(CastInst &CI);
- Instruction *visitFPToSI(CastInst &CI);
+ Instruction *visitFPToUI(FPToUIInst &FI);
+ Instruction *visitFPToSI(FPToSIInst &FI);
Instruction *visitUIToFP(CastInst &CI);
Instruction *visitSIToFP(CastInst &CI);
Instruction *visitPtrToInt(CastInst &CI);
@@ -229,6 +232,7 @@
Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
// visitInstruction - Specify what to return for unhandled instructions...
Instruction *visitInstruction(Instruction &I) { return 0; }
@@ -239,6 +243,7 @@
Instruction *transformCallThroughTrampoline(CallSite CS);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
@@ -263,7 +268,7 @@
if (Constant *CV = dyn_cast<Constant>(V))
return ConstantExpr::getCast(opc, CV, Ty);
- Instruction *C = CastInst::create(opc, V, Ty, V->getName(), &Pos);
+ Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
AddToWorkList(C);
return C;
}
@@ -320,6 +325,19 @@
I.eraseFromParent();
return 0; // Don't do anything with FI
}
+
+ void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0) const {
+ return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ unsigned Depth = 0) const {
+ return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+ return llvm::ComputeNumSignBits(Op, TD, Depth);
+ }
private:
/// InsertOperandCastBefore - This inserts a cast of V to DestTy before the
@@ -370,24 +388,24 @@
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+ Instruction *SimplifyMemSet(MemSetInst *MI);
Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
- void ComputeMaskedBits(Value *V, const APInt &Mask, APInt& KnownZero,
- APInt& KnownOne, unsigned Depth = 0);
- bool MaskedValueIsZero(Value *V, const APInt& Mask, unsigned Depth = 0);
bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
unsigned CastOpc,
int &NumCastsRemoved);
unsigned GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign = 0);
- };
- char InstCombiner::ID = 0;
- RegisterPass<InstCombiner> X("instcombine", "Combine redundant instructions");
+ };
}
+char InstCombiner::ID = 0;
+static RegisterPass<InstCombiner>
+X("instcombine", "Combine redundant instructions");
+
// getComplexity: Assign a complexity or rank value to LLVM Values...
// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
static unsigned getComplexity(Value *V) {
@@ -511,7 +529,7 @@
// Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
- Instruction *New = BinaryOperator::create(Opcode, Op->getOperand(0),
+ Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
Op1->getOperand(0),
Op1->getName(), &I);
AddToWorkList(New);
@@ -544,6 +562,11 @@
// Constants can be considered to be negated values if they can be folded.
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantExpr::getNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isInteger())
+ return ConstantExpr::getNeg(C);
+
return 0;
}
@@ -592,10 +615,10 @@
/// getOpcode - If this is an Instruction or a ConstantExpr, return the
/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(User *U) {
- if (Instruction *I = dyn_cast<Instruction>(U))
+static unsigned getOpcode(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getOpcode();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U))
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
return CE->getOpcode();
// Use UserOp1 to mean there's no opcode.
return Instruction::UserOp1;
@@ -650,457 +673,6 @@
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
}
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
-/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
-/// we cannot optimize based on the assumption that it is zero without changing
-/// it to be an explicit zero. If we don't change it to zero, other code could
-/// optimized based on the contradictory assumption that it is non-zero.
-/// Because instcombine aggressively folds operations with undef args anyway,
-/// this won't lose us code quality.
-void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask,
- APInt& KnownZero, APInt& KnownOne,
- unsigned Depth) {
- assert(V && "No Value?");
- assert(Depth <= 6 && "Limit Search Depth");
- uint32_t BitWidth = Mask.getBitWidth();
- assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) &&
- "Not integer or pointer type!");
- assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) &&
- (!isa<IntegerType>(V->getType()) ||
- V->getType()->getPrimitiveSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "V, Mask, KnownOne and KnownZero should have same BitWidth");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // We know all of the bits for a constant!
- KnownOne = CI->getValue() & Mask;
- KnownZero = ~KnownOne & Mask;
- return;
- }
- // Null is all-zeros.
- if (isa<ConstantPointerNull>(V)) {
- KnownOne.clear();
- KnownZero = Mask;
- return;
- }
- // The address of an aligned GlobalValue has trailing zeros.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- unsigned Align = GV->getAlignment();
- if (Align == 0 && TD && GV->getType()->getElementType()->isSized())
- Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
- if (Align > 0)
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
- else
- KnownZero.clear();
- KnownOne.clear();
- return;
- }
-
- if (Depth == 6 || Mask == 0)
- return; // Limit search depth.
-
- User *I = dyn_cast<User>(V);
- if (!I) return;
-
- KnownZero.clear(); KnownOne.clear(); // Don't know anything.
- APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
-
- switch (getOpcode(I)) {
- default: break;
- case Instruction::And: {
- // If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- APInt Mask2(Mask & ~KnownZero);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-1 bits are only known if set in both the LHS & RHS.
- KnownOne &= KnownOne2;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- KnownZero |= KnownZero2;
- return;
- }
- case Instruction::Or: {
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- APInt Mask2(Mask & ~KnownOne);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- KnownZero &= KnownZero2;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- KnownOne |= KnownOne2;
- return;
- }
- case Instruction::Xor: {
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
- KnownZero = KnownZeroOut;
- return;
- }
- case Instruction::Mul: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // If low bits are zero in either operand, output low known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- KnownOne.clear();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- TrailZ = std::min(TrailZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
- KnownZero &= Mask;
- return;
- }
- case Instruction::Select:
- ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- return;
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- return; // Can't work with floating point.
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- // We can't handle these if we don't know the pointer size.
- if (!TD) return;
- // Fall through and handle them the same as zext/trunc.
- case Instruction::ZExt:
- case Instruction::Trunc: {
- // All these have integer operands
- const Type *SrcTy = I->getOperand(0)->getType();
- uint32_t SrcBitWidth = TD ?
- TD->getTypeSizeInBits(SrcTy) :
- SrcTy->getPrimitiveSizeInBits();
- APInt MaskIn(Mask);
- MaskIn.zextOrTrunc(SrcBitWidth);
- KnownZero.zextOrTrunc(SrcBitWidth);
- KnownOne.zextOrTrunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
- KnownZero.zextOrTrunc(BitWidth);
- KnownOne.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- return;
- }
- case Instruction::BitCast: {
- const Type *SrcTy = I->getOperand(0)->getType();
- if (SrcTy->isInteger() || isa<PointerType>(SrcTy)) {
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
- return;
- }
- break;
- }
- case Instruction::SExt: {
- // Compute the bits in the result that are not present in the input.
- const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
- uint32_t SrcBitWidth = SrcTy->getBitWidth();
-
- APInt MaskIn(Mask);
- MaskIn.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
-
- // If the sign bit of the input is known set or clear, then we know the
- // top bits of the result.
- if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
- KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- return;
- }
- case Instruction::Shl:
- // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- APInt Mask2(Mask.lshr(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero <<= ShiftAmt;
- KnownOne <<= ShiftAmt;
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- return;
- }
- break;
- case Instruction::LShr:
- // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
- // Unsigned shift right.
- APInt Mask2(Mask.shl(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
- // high bits known zero.
- KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- return;
- }
- break;
- case Instruction::AShr:
- // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
- // Signed shift right.
- APInt Mask2(Mask.shl(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
-
- APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
- KnownZero |= HighBits;
- else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
- KnownOne |= HighBits;
- return;
- }
- break;
- case Instruction::Sub: {
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (!CLHS->getValue().isNegative()) {
- unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
-
- // If all of the MaskV bits are known to be zero, then we know the output
- // top bits are zero, because we now know that the output is from [0-C].
- if ((KnownZero & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
- // Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
- KnownOne = APInt(BitWidth, 0); // No one bits known.
- } else {
- KnownZero = KnownOne = APInt(BitWidth, 0); // Otherwise, nothing known.
- }
- return;
- }
- }
- }
- // fall through
- case Instruction::Add: {
- // If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-0 bits are known if clear or set in both the low clear bits
- // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
- // low 3 bits clear.
- unsigned KnownZeroOut = std::min(KnownZero.countTrailingOnes(),
- KnownZero2.countTrailingOnes());
-
- KnownZero = APInt::getLowBitsSet(BitWidth, KnownZeroOut);
- KnownOne = APInt(BitWidth, 0);
- return;
- }
- case Instruction::SRem:
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue();
- if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
- APInt LowBits = RA.isStrictlyPositive() ? ((RA - 1) | RA) : ~RA;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- ComputeMaskedBits(I->getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
-
- // The sign of a remainder is equal to the sign of the first
- // operand (zero being positive).
- if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
- KnownZero2 |= ~LowBits;
- else if (KnownOne2[BitWidth-1])
- KnownOne2 |= ~LowBits;
-
- KnownZero |= KnownZero2 & Mask;
- KnownOne |= KnownOne2 & Mask;
-
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- }
- }
- break;
- case Instruction::URem:
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue();
- if (RA.isStrictlyPositive() && RA.isPowerOf2()) {
- APInt LowBits = (RA - 1) | RA;
- APInt Mask2 = LowBits & Mask;
- KnownZero |= ~LowBits & Mask;
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- }
- } else {
- // Since the result is less than or equal to RHS, any leading zero bits
- // in RHS must also exist in the result.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
- Depth+1);
-
- uint32_t Leaders = KnownZero2.countLeadingOnes();
- KnownZero |= APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- }
- break;
-
- case Instruction::Alloca:
- case Instruction::Malloc: {
- AllocationInst *AI = cast<AllocationInst>(V);
- unsigned Align = AI->getAlignment();
- if (Align == 0 && TD) {
- if (isa<AllocaInst>(AI))
- Align = TD->getPrefTypeAlignment(AI->getType()->getElementType());
- else if (isa<MallocInst>(AI)) {
- // Malloc returns maximally aligned memory.
- Align = TD->getABITypeAlignment(AI->getType()->getElementType());
- Align =
- std::max(Align,
- (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
- Align =
- std::max(Align,
- (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
- }
- }
-
- if (Align > 0)
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
- break;
- }
- case Instruction::GetElementPtr: {
- // Analyze all of the subscripts of this getelementptr instruction
- // to determine if we can prove known low zero bits.
- APInt LocalMask = APInt::getAllOnesValue(BitWidth);
- APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LocalMask,
- LocalKnownZero, LocalKnownOne, Depth+1);
- unsigned TrailZ = LocalKnownZero.countTrailingOnes();
-
- gep_type_iterator GTI = gep_type_begin(I);
- for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
- Value *Index = I->getOperand(i);
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
- // Handle struct member offset arithmetic.
- if (!TD) return;
- const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
- uint64_t Offset = SL->getElementOffset(Idx);
- TrailZ = std::min(TrailZ,
- CountTrailingZeros_64(Offset));
- } else {
- // Handle array index arithmetic.
- const Type *IndexedTy = GTI.getIndexedType();
- if (!IndexedTy->isSized()) return;
- unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits();
- uint64_t TypeSize = TD ? TD->getABITypeSize(IndexedTy) : 1;
- LocalMask = APInt::getAllOnesValue(GEPOpiBits);
- LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- ComputeMaskedBits(Index, LocalMask,
- LocalKnownZero, LocalKnownOne, Depth+1);
- TrailZ = std::min(TrailZ,
- CountTrailingZeros_64(TypeSize) +
- LocalKnownZero.countTrailingOnes());
- }
- }
-
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
- break;
- }
- case Instruction::PHI: {
- PHINode *P = cast<PHINode>(I);
- // Handle the case of a simple two-predecessor recurrence PHI.
- // There's a lot more that could theoretically be done here, but
- // this is sufficient to catch some interesting cases.
- if (P->getNumIncomingValues() == 2) {
- for (unsigned i = 0; i != 2; ++i) {
- Value *L = P->getIncomingValue(i);
- Value *R = P->getIncomingValue(!i);
- User *LU = dyn_cast<User>(L);
- unsigned Opcode = LU ? getOpcode(LU) : (unsigned)Instruction::UserOp1;
- // Check for operations that have the property that if
- // both their operands have low zero bits, the result
- // will have low zero bits.
- if (Opcode == Instruction::Add ||
- Opcode == Instruction::Sub ||
- Opcode == Instruction::And ||
- Opcode == Instruction::Or ||
- Opcode == Instruction::Mul) {
- Value *LL = LU->getOperand(0);
- Value *LR = LU->getOperand(1);
- // Find a recurrence.
- if (LL == I)
- L = LR;
- else if (LR == I)
- L = LL;
- else
- break;
- // Ok, we have a PHI of the form L op= R. Check for low
- // zero bits.
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, Depth+1);
- Mask2 = APInt::getLowBitsSet(BitWidth,
- KnownZero2.countTrailingOnes());
- KnownOne2.clear();
- KnownZero2.clear();
- ComputeMaskedBits(L, Mask2, KnownZero2, KnownOne2, Depth+1);
- KnownZero = Mask &
- APInt::getLowBitsSet(BitWidth,
- KnownZero2.countTrailingOnes());
- break;
- }
- }
- }
- break;
- }
- }
-}
-
-/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
-/// this predicate to simplify operations downstream. Mask is known to be zero
-/// for bits that V cannot have.
-bool InstCombiner::MaskedValueIsZero(Value *V, const APInt& Mask,
- unsigned Depth) {
- APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- return (KnownZero & Mask) == Mask;
-}
/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
@@ -1232,7 +804,9 @@
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
switch (I->getOpcode()) {
- default: break;
+ default:
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+ break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
if (SimplifyDemandedBits(I->getOperand(1), DemandedMask,
@@ -1344,7 +918,7 @@
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
Instruction *Or =
- BinaryOperator::createOr(I->getOperand(0), I->getOperand(1),
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
InsertNewInstBefore(Or, *I);
return UpdateValueUsesWith(I, Or);
@@ -1359,7 +933,7 @@
if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask);
Instruction *And =
- BinaryOperator::createAnd(I->getOperand(0), AndC, "tmp");
+ BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
InsertNewInstBefore(And, *I);
return UpdateValueUsesWith(I, And);
}
@@ -1518,7 +1092,7 @@
// Turn it into OR if input bits are zero.
if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
Instruction *Or =
- BinaryOperator::createOr(I->getOperand(0), I->getOperand(1),
+ BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
InsertNewInstBefore(Or, *I);
return UpdateValueUsesWith(I, Or);
@@ -1578,6 +1152,9 @@
LHSKnownZero, LHSKnownOne, Depth+1))
return true;
}
+ // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // the known zeros and ones.
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
break;
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -1623,7 +1200,7 @@
// the shift amount is >= the size of the datatype, which is undefined.
if (DemandedMask == 1) {
// Perform the logical shift right.
- Value *NewVal = BinaryOperator::createLShr(
+ Value *NewVal = BinaryOperator::CreateLShr(
I->getOperand(0), I->getOperand(1), I->getName());
InsertNewInstBefore(cast<Instruction>(NewVal), *I);
return UpdateValueUsesWith(I, NewVal);
@@ -1661,10 +1238,10 @@
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (RHSKnownZero[BitWidth-ShiftAmt-1] ||
+ if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] ||
(HighBits & ~DemandedMask) == HighBits) {
// Perform the logical shift right.
- Value *NewVal = BinaryOperator::createLShr(
+ Value *NewVal = BinaryOperator::CreateLShr(
I->getOperand(0), SA, I->getName());
InsertNewInstBefore(cast<Instruction>(NewVal), *I);
return UpdateValueUsesWith(I, NewVal);
@@ -1677,7 +1254,7 @@
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue();
if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
- APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) | RA : ~RA;
+ APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
if (SimplifyDemandedBits(I->getOperand(0), Mask2,
LHSKnownZero, LHSKnownOne, Depth+1))
@@ -1695,11 +1272,11 @@
}
}
break;
- case Instruction::URem:
+ case Instruction::URem: {
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue();
if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1) | RA;
+ APInt LowBits = (RA - 1);
APInt Mask2 = LowBits & DemandedMask;
KnownZero |= ~LowBits & DemandedMask;
if (SimplifyDemandedBits(I->getOperand(0), Mask2,
@@ -1707,17 +1284,66 @@
return true;
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
}
- } else {
- APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I->getOperand(1), AllOnes,
- KnownZero2, KnownOne2, Depth+1))
- return true;
+ }
+
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ if (SimplifyDemandedBits(I->getOperand(0), AllOnes,
+ KnownZero2, KnownOne2, Depth+1))
+ return true;
+
+ uint32_t Leaders = KnownZero2.countLeadingOnes();
+ if (SimplifyDemandedBits(I->getOperand(1), AllOnes,
+ KnownZero2, KnownOne2, Depth+1))
+ return true;
- uint32_t Leaders = KnownZero2.countLeadingOnes();
- KnownZero |= APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ Leaders = std::max(Leaders,
+ KnownZero2.countLeadingOnes());
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ break;
+ }
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ InsertNewInstBefore(NewVal, *I);
+ return UpdateValueUsesWith(I, NewVal);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ }
}
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
break;
}
@@ -1966,12 +1592,12 @@
default: assert(0 && "Case stmts out of sync!");
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse2_sub_sd:
- TmpV = InsertNewInstBefore(BinaryOperator::createSub(LHS, RHS,
+ TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS,
II->getName()), *II);
break;
case Intrinsic::x86_sse_mul_ss:
case Intrinsic::x86_sse2_mul_sd:
- TmpV = InsertNewInstBefore(BinaryOperator::createMul(LHS, RHS,
+ TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS,
II->getName()), *II);
break;
}
@@ -1996,21 +1622,6 @@
return MadeChange ? I : 0;
}
-/// @returns true if the specified compare predicate is
-/// true when both operands are equal...
-/// @brief Determine if the icmp Predicate is true when both operands are equal
-static bool isTrueWhenEqual(ICmpInst::Predicate pred) {
- return pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_UGE ||
- pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE ||
- pred == ICmpInst::ICMP_SLE;
-}
-
-/// @returns true if the specified compare instruction is
-/// true when both operands are equal...
-/// @brief Determine if the ICmpInst returns true when both operands are equal
-static bool isTrueWhenEqual(ICmpInst &ICI) {
- return isTrueWhenEqual(ICI.getPredicate());
-}
/// AssociativeOpt - Perform an optimization on an associative operator. This
/// function is designed to check a chain of associative operators for a
@@ -2021,7 +1632,7 @@
/// 'shouldApply' and 'apply' methods.
///
template<typename Functor>
-Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
unsigned Opcode = Root.getOpcode();
Value *LHS = Root.getOperand(0);
@@ -2044,8 +1655,6 @@
// If the functor wants to apply the optimization to the RHS of LHSI,
// reassociate the expression from ((? op A) op B) to (? op (A op B))
if (ShouldApply) {
- BasicBlock *BB = Root.getParent();
-
// Now all of the instructions are in the current basic block, go ahead
// and perform the reassociation.
Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
@@ -2061,9 +1670,8 @@
}
Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI
TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root
- TmpLHSI->getParent()->getInstList().remove(TmpLHSI);
BasicBlock::iterator ARI = &Root; ++ARI;
- BB->getInstList().insert(ARI, TmpLHSI); // Move TmpLHSI to after Root
+ TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root
ARI = Root;
// Now propagate the ExtraOperand down the chain of instructions until we
@@ -2072,8 +1680,7 @@
Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
// Move the instruction to immediately before the chain we are
// constructing to avoid breaking dominance properties.
- NextLHSI->getParent()->getInstList().remove(NextLHSI);
- BB->getInstList().insert(ARI, NextLHSI);
+ NextLHSI->moveBefore(ARI);
ARI = NextLHSI;
Value *NextOp = NextLHSI->getOperand(1);
@@ -2092,6 +1699,7 @@
return 0;
}
+namespace {
// AddRHS - Implements: X + X --> X << 1
struct AddRHS {
@@ -2099,8 +1707,8 @@
AddRHS(Value *rhs) : RHS(rhs) {}
bool shouldApply(Value *LHS) const { return LHS == RHS; }
Instruction *apply(BinaryOperator &Add) const {
- return BinaryOperator::createShl(Add.getOperand(0),
- ConstantInt::get(Add.getType(), 1));
+ return BinaryOperator::CreateShl(Add.getOperand(0),
+ ConstantInt::get(Add.getType(), 1));
}
};
@@ -2115,17 +1723,19 @@
ConstantExpr::getAnd(C1, C2)->isNullValue();
}
Instruction *apply(BinaryOperator &Add) const {
- return BinaryOperator::createOr(Add.getOperand(0), Add.getOperand(1));
+ return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
}
};
+}
+
static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner *IC) {
if (CastInst *CI = dyn_cast<CastInst>(&I)) {
if (Constant *SOC = dyn_cast<Constant>(SO))
return ConstantExpr::getCast(CI->getOpcode(), SOC, I.getType());
- return IC->InsertNewInstBefore(CastInst::create(
+ return IC->InsertNewInstBefore(CastInst::Create(
CI->getOpcode(), SO, I.getType(), SO->getName() + ".cast"), I);
}
@@ -2144,9 +1754,9 @@
std::swap(Op0, Op1);
Instruction *New;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
- New = BinaryOperator::create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
+ New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
- New = CmpInst::create(CI->getOpcode(), CI->getPredicate(), Op0, Op1,
+ New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1,
SO->getName()+".cmp");
else {
assert(0 && "Unknown binary instruction type!");
@@ -2232,11 +1842,11 @@
} else {
assert(PN->getIncomingBlock(i) == NonConstBB);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
- InV = BinaryOperator::create(BO->getOpcode(),
+ InV = BinaryOperator::Create(BO->getOpcode(),
PN->getIncomingValue(i), C, "phitmp",
NonConstBB->getTerminator());
else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
- InV = CmpInst::create(CI->getOpcode(),
+ InV = CmpInst::Create(CI->getOpcode(),
CI->getPredicate(),
PN->getIncomingValue(i), C, "phitmp",
NonConstBB->getTerminator());
@@ -2256,7 +1866,7 @@
InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
} else {
assert(PN->getIncomingBlock(i) == NonConstBB);
- InV = CastInst::create(CI->getOpcode(), PN->getIncomingValue(i),
+ InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
I.getType(), "phitmp",
NonConstBB->getTerminator());
AddToWorkList(cast<Instruction>(InV));
@@ -2268,42 +1878,28 @@
}
-/// CannotBeNegativeZero - Return true if we can prove that the specified FP
-/// value is never equal to -0.0.
-///
-/// Note that this function will need to be revisited when we support nondefault
-/// rounding modes!
-///
-static bool CannotBeNegativeZero(const Value *V) {
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
- return !CFP->getValueAPF().isNegZero();
-
- // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
- if (I->getOpcode() == Instruction::Add &&
- isa<ConstantFP>(I->getOperand(1)) &&
- cast<ConstantFP>(I->getOperand(1))->isNullValue())
- return true;
-
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::sqrt)
- return CannotBeNegativeZero(II->getOperand(1));
-
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (const Function *F = CI->getCalledFunction()) {
- if (F->isDeclaration()) {
- switch (F->getNameLen()) {
- case 3: // abs(x) != -0.0
- if (!strcmp(F->getNameStart(), "abs")) return true;
- break;
- case 4: // abs[lf](x) != -0.0
- if (!strcmp(F->getNameStart(), "absf")) return true;
- if (!strcmp(F->getNameStart(), "absl")) return true;
- break;
- }
- }
- }
- }
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+ // There are different heuristics we can use for this. Here are some simple
+ // ones.
+
+ // Add has the property that adding any two 2's complement numbers can only
+ // have one carry bit which can change a sign. As such, if LHS and RHS each
+ // have at least two sign bits, we know that the addition of the two values will
+ // sign extend fine.
+ if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+ return true;
+
+
+ // If one of the operands only has one non-zero bit, and if the other operand
+ // has a known-zero bit in a more significant place than it (not including the
+ // sign bit) the ripple may go up to and fill the zero, but won't change the
+ // sign. For example, (X & ~4) + 1.
+
+ // TODO: Implement.
return false;
}
@@ -2333,7 +1929,7 @@
const APInt& Val = CI->getValue();
uint32_t BitWidth = Val.getBitWidth();
if (Val == APInt::getSignBit(BitWidth))
- return BinaryOperator::createXor(LHS, RHS);
+ return BinaryOperator::CreateXor(LHS, RHS);
// See if SimplifyDemandedBits can simplify this. This handles stuff like
// (X & 254)+1 -> (X&254)|1
@@ -2378,9 +1974,9 @@
} while (Size >= 1);
// FIXME: This shouldn't be necessary. When the backends can handle types
- // with funny bit widths then this whole cascade of if statements should
- // be removed. It is just here to get the size of the "middle" type back
- // up to something that the back ends can handle.
+ // with funny bit widths then this switch statement should be removed. It
+ // is just here to get the size of the "middle" type back up to something
+ // that the back ends can handle.
const Type *MiddleType = 0;
switch (Size) {
default: break;
@@ -2396,8 +1992,11 @@
}
}
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateXor(LHS, RHS);
+
// X + X --> X << 1
- if (I.getType()->isInteger() && I.getType() != Type::Int1Ty) {
+ if (I.getType()->isInteger()) {
if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result;
if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
@@ -2417,35 +2016,35 @@
if (Value *LHSV = dyn_castNegVal(LHS)) {
if (LHS->getType()->isIntOrIntVector()) {
if (Value *RHSV = dyn_castNegVal(RHS)) {
- Instruction *NewAdd = BinaryOperator::createAdd(LHSV, RHSV, "sum");
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
InsertNewInstBefore(NewAdd, I);
- return BinaryOperator::createNeg(NewAdd);
+ return BinaryOperator::CreateNeg(NewAdd);
}
}
- return BinaryOperator::createSub(RHS, LHSV);
+ return BinaryOperator::CreateSub(RHS, LHSV);
}
// A + -B --> A - B
if (!isa<Constant>(RHS))
if (Value *V = dyn_castNegVal(RHS))
- return BinaryOperator::createSub(LHS, V);
+ return BinaryOperator::CreateSub(LHS, V);
ConstantInt *C2;
if (Value *X = dyn_castFoldableMul(LHS, C2)) {
if (X == RHS) // X*C + X --> X * (C+1)
- return BinaryOperator::createMul(RHS, AddOne(C2));
+ return BinaryOperator::CreateMul(RHS, AddOne(C2));
// X*C1 + X*C2 --> X * (C1+C2)
ConstantInt *C1;
if (X == dyn_castFoldableMul(RHS, C1))
- return BinaryOperator::createMul(X, Add(C1, C2));
+ return BinaryOperator::CreateMul(X, Add(C1, C2));
}
// X + X*C --> X * (C+1)
if (dyn_castFoldableMul(RHS, C2) == LHS)
- return BinaryOperator::createMul(LHS, AddOne(C2));
+ return BinaryOperator::CreateMul(LHS, AddOne(C2));
// X + ~X --> -1 since ~X = -X-1
if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
@@ -2456,6 +2055,23 @@
if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
return R;
+
+ // A+B --> A|B iff A and B have no bits set in common.
+ if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+ APInt LHSKnownOne(IT->getBitWidth(), 0);
+ APInt LHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+ if (LHSKnownZero != 0) {
+ APInt RHSKnownOne(IT->getBitWidth(), 0);
+ APInt RHSKnownZero(IT->getBitWidth(), 0);
+ ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+ // No bits in common -> bitwise or.
+ if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+ return BinaryOperator::CreateOr(LHS, RHS);
+ }
+ }
// W*X + Y*Z --> W * (X+Z) iff W == Y
if (I.getType()->isIntOrIntVector()) {
@@ -2474,9 +2090,9 @@
}
if (W == Y) {
- Value *NewAdd = InsertNewInstBefore(BinaryOperator::createAdd(X, Z,
+ Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z,
LHS->getName()), I);
- return BinaryOperator::createMul(W, NewAdd);
+ return BinaryOperator::CreateMul(W, NewAdd);
}
}
}
@@ -2484,7 +2100,7 @@
if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
Value *X = 0;
if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
- return BinaryOperator::createSub(SubOne(CRHS), X);
+ return BinaryOperator::CreateSub(SubOne(CRHS), X);
// (X & FF00) + xx00 -> (X+xx00) & FF00
if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
@@ -2502,9 +2118,9 @@
if (AddRHSHighBits == AddRHSHighBitsAnd) {
// Okay, the xform is safe. Insert the new add pronto.
- Value *NewAdd = InsertNewInstBefore(BinaryOperator::createAdd(X, CRHS,
+ Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS,
LHS->getName()), I);
- return BinaryOperator::createAnd(NewAdd, C2);
+ return BinaryOperator::CreateAnd(NewAdd, C2);
}
}
}
@@ -2566,16 +2182,87 @@
if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
return ReplaceInstUsesWith(I, LHS);
+ // Check for (add (sext x), y), see if we can merge this into an
+ // integer add followed by a sext.
+ if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+ // (add (sext x), cst) --> (sext (add x, cst'))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new, smaller add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add (sext x), (sext y)) --> (sext (add int x, y))
+ if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of sexts), and if the
+ // integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),
+ "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SExtInst(NewAdd, I.getType());
+ }
+ }
+ }
+
+ // Check for (add double (sitofp x), y), see if we can merge this into an
+ // integer add followed by a promotion.
+ if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // ... if the constant fits in the integer value. This is useful for things
+ // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+ // requires a constant pool load, and generally allows the add to be better
+ // instcombined.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ // Insert the new integer add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ CI, "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+
+ // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+ // Only do this if x/y have the same type, if at last one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0))) {
+ // Insert the new integer add.
+ Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),
+ RHSConv->getOperand(0),
+ "addconv");
+ InsertNewInstBefore(NewAdd, I);
+ return new SIToFPInst(NewAdd, I.getType());
+ }
+ }
+ }
+
return Changed ? &I : 0;
}
-// isSignBit - Return true if the value represented by the constant only has the
-// highest order bit set.
-static bool isSignBit(ConstantInt *CI) {
- uint32_t NumBits = CI->getType()->getPrimitiveSizeInBits();
- return CI->getValue() == APInt::getSignBit(NumBits);
-}
-
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -2584,7 +2271,7 @@
// If this is a 'B = x-(-A)', change to B = x+A...
if (Value *V = dyn_castNegVal(Op1))
- return BinaryOperator::createAdd(Op0, V);
+ return BinaryOperator::CreateAdd(Op0, V);
if (isa<UndefValue>(Op0))
return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
@@ -2594,12 +2281,12 @@
if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
// Replace (-1 - A) with (~A)...
if (C->isAllOnesValue())
- return BinaryOperator::createNot(Op1);
+ return BinaryOperator::CreateNot(Op1);
// C - ~X == X + (1+C)
Value *X = 0;
if (match(Op1, m_Not(m_Value(X))))
- return BinaryOperator::createAdd(X, AddOne(C));
+ return BinaryOperator::CreateAdd(X, AddOne(C));
// -(X >>u 31) -> (X >>s 31)
// -(X >>s 31) -> (X >>u 31)
@@ -2611,7 +2298,7 @@
if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
SI->getType()->getPrimitiveSizeInBits()-1) {
// Ok, the transformation is safe. Insert AShr.
- return BinaryOperator::create(Instruction::AShr,
+ return BinaryOperator::Create(Instruction::AShr,
SI->getOperand(0), CU, SI->getName());
}
}
@@ -2622,7 +2309,7 @@
if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
SI->getType()->getPrimitiveSizeInBits()-1) {
// Ok, the transformation is safe. Insert LShr.
- return BinaryOperator::createLShr(
+ return BinaryOperator::CreateLShr(
SI->getOperand(0), CU, SI->getName());
}
}
@@ -2640,17 +2327,20 @@
return NV;
}
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateXor(Op0, Op1);
+
if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
if (Op1I->getOpcode() == Instruction::Add &&
!Op0->getType()->isFPOrFPVector()) {
if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
- return BinaryOperator::createNeg(Op1I->getOperand(1), I.getName());
+ return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
- return BinaryOperator::createNeg(Op1I->getOperand(0), I.getName());
+ return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName());
else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
// C1-(X+C2) --> (C1-C2)-X
- return BinaryOperator::createSub(Subtract(CI1, CI2),
+ return BinaryOperator::CreateSub(Subtract(CI1, CI2),
Op1I->getOperand(0));
}
}
@@ -2667,7 +2357,7 @@
Op1I->setOperand(1, IIOp0);
// Create the new top level add instruction...
- return BinaryOperator::createAdd(Op0, Op1);
+ return BinaryOperator::CreateAdd(Op0, Op1);
}
// Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
@@ -2677,8 +2367,8 @@
Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
Value *NewNot =
- InsertNewInstBefore(BinaryOperator::createNot(OtherOp, "B.not"), I);
- return BinaryOperator::createAnd(Op0, NewNot);
+ InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I);
+ return BinaryOperator::CreateAnd(Op0, NewNot);
}
// 0 - (X sdiv C) -> (X sdiv -C)
@@ -2686,14 +2376,14 @@
if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
if (CSI->isZero())
if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
- return BinaryOperator::createSDiv(Op1I->getOperand(0),
+ return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
ConstantExpr::getNeg(DivRHS));
// X - X*C --> X * (1-C)
ConstantInt *C2 = 0;
if (dyn_castFoldableMul(Op1I, C2) == Op0) {
Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
- return BinaryOperator::createMul(Op0, CP1);
+ return BinaryOperator::CreateMul(Op0, CP1);
}
// X - ((X / Y) * Y) --> X % Y
@@ -2702,9 +2392,9 @@
if (Op0 == I->getOperand(0) &&
Op1I->getOperand(1) == I->getOperand(1)) {
if (I->getOpcode() == Instruction::SDiv)
- return BinaryOperator::createSRem(Op0, Op1I->getOperand(1));
+ return BinaryOperator::CreateSRem(Op0, Op1I->getOperand(1));
if (I->getOpcode() == Instruction::UDiv)
- return BinaryOperator::createURem(Op0, Op1I->getOperand(1));
+ return BinaryOperator::CreateURem(Op0, Op1I->getOperand(1));
}
}
}
@@ -2718,18 +2408,18 @@
return ReplaceInstUsesWith(I, Op0I->getOperand(0));
} else if (Op0I->getOpcode() == Instruction::Sub) {
if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y
- return BinaryOperator::createNeg(Op0I->getOperand(1), I.getName());
+ return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
}
}
ConstantInt *C1;
if (Value *X = dyn_castFoldableMul(Op0, C1)) {
if (X == Op1) // X*C - X --> X * (C-1)
- return BinaryOperator::createMul(Op1, SubOne(C1));
+ return BinaryOperator::CreateMul(Op1, SubOne(C1));
ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
if (X == dyn_castFoldableMul(Op1, C2))
- return BinaryOperator::createMul(X, Subtract(C1, C2));
+ return BinaryOperator::CreateMul(X, Subtract(C1, C2));
}
return 0;
}
@@ -2758,8 +2448,7 @@
case ICmpInst::ICMP_UGE:
// True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
TrueIfSigned = true;
- return RHS->getValue() ==
- APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
+ return RHS->getValue().isSignBit();
default:
return false;
}
@@ -2780,7 +2469,7 @@
if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
if (SI->getOpcode() == Instruction::Shl)
if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
- return BinaryOperator::createMul(SI->getOperand(0),
+ return BinaryOperator::CreateMul(SI->getOperand(0),
ConstantExpr::getShl(CI, ShOp));
if (CI->isZero())
@@ -2788,11 +2477,11 @@
if (CI->equalsInt(1)) // X * 1 == X
return ReplaceInstUsesWith(I, Op0);
if (CI->isAllOnesValue()) // X * -1 == 0 - X
- return BinaryOperator::createNeg(Op0, I.getName());
+ return BinaryOperator::CreateNeg(Op0, I.getName());
const APInt& Val = cast<ConstantInt>(CI)->getValue();
if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
- return BinaryOperator::createShl(Op0,
+ return BinaryOperator::CreateShl(Op0,
ConstantInt::get(Op0->getType(), Val.logBase2()));
}
} else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
@@ -2809,14 +2498,14 @@
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
- isa<ConstantInt>(Op0I->getOperand(1))) {
+ isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
// Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
- Instruction *Add = BinaryOperator::createMul(Op0I->getOperand(0),
+ Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
Op1, "tmp");
InsertNewInstBefore(Add, I);
Value *C1C2 = ConstantExpr::getMul(Op1,
cast<Constant>(Op0I->getOperand(1)));
- return BinaryOperator::createAdd(Add, C1C2);
+ return BinaryOperator::CreateAdd(Add, C1C2);
}
@@ -2832,14 +2521,17 @@
if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y
if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
- return BinaryOperator::createMul(Op0v, Op1v);
+ return BinaryOperator::CreateMul(Op0v, Op1v);
+
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// See if we can simplify things based on how the boolean was originally
// formed.
CastInst *BoolCast = 0;
- if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(0)))
+ if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
if (CI->getOperand(0)->getType() == Type::Int1Ty)
BoolCast = CI;
if (!BoolCast)
@@ -2862,7 +2554,7 @@
SCOpTy->getPrimitiveSizeInBits()-1);
Value *V =
InsertNewInstBefore(
- BinaryOperator::create(Instruction::AShr, SCIOp0, Amt,
+ BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt,
BoolCast->getOperand(0)->getName()+
".mask"), I);
@@ -2878,7 +2570,7 @@
}
Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
- return BinaryOperator::createAnd(V, OtherOp);
+ return BinaryOperator::CreateAnd(V, OtherOp);
}
}
}
@@ -2949,6 +2641,18 @@
Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ // (sdiv X, X) --> 1 (udiv X, X) --> 1
+ if (Op0 == Op1) {
+ if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+ ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+ std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+ return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+ }
+
+ ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+ return ReplaceInstUsesWith(I, CI);
+ }
+
if (Instruction *Common = commonDivTransforms(I))
return Common;
@@ -2964,7 +2668,7 @@
if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
else
- return BinaryOperator::create(I.getOpcode(), LHS->getOperand(0),
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
Multiply(RHS, LHSRHS));
}
@@ -2983,6 +2687,10 @@
if (LHS->equalsInt(0))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ // It can't be division by zero, hence it must be division by one.
+ if (I.getType() == Type::Int1Ty)
+ return ReplaceInstUsesWith(I, Op0);
+
return 0;
}
@@ -2998,7 +2706,7 @@
// if so, convert to a right shift.
if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2
- return BinaryOperator::createLShr(Op0,
+ return BinaryOperator::CreateLShr(Op0,
ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
}
@@ -3012,9 +2720,9 @@
const Type *NTy = N->getType();
if (uint32_t C2 = C1.logBase2()) {
Constant *C2V = ConstantInt::get(NTy, C2);
- N = InsertNewInstBefore(BinaryOperator::createAdd(N, C2V, "tmp"), I);
+ N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
}
- return BinaryOperator::createLShr(Op0, N);
+ return BinaryOperator::CreateLShr(Op0, N);
}
}
}
@@ -3030,13 +2738,13 @@
uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
// Construct the "on true" case of the select
Constant *TC = ConstantInt::get(Op0->getType(), TSA);
- Instruction *TSI = BinaryOperator::createLShr(
+ Instruction *TSI = BinaryOperator::CreateLShr(
Op0, TC, SI->getName()+".t");
TSI = InsertNewInstBefore(TSI, I);
// Construct the "on false" case of the select
Constant *FC = ConstantInt::get(Op0->getType(), FSA);
- Instruction *FSI = BinaryOperator::createLShr(
+ Instruction *FSI = BinaryOperator::CreateLShr(
Op0, FC, SI->getName()+".f");
FSI = InsertNewInstBefore(FSI, I);
@@ -3057,11 +2765,11 @@
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
// sdiv X, -1 == -X
if (RHS->isAllOnesValue())
- return BinaryOperator::createNeg(Op0);
+ return BinaryOperator::CreateNeg(Op0);
// -X/C -> X/-C
if (Value *LHSNeg = dyn_castNegVal(Op0))
- return BinaryOperator::createSDiv(LHSNeg, ConstantExpr::getNeg(RHS));
+ return BinaryOperator::CreateSDiv(LHSNeg, ConstantExpr::getNeg(RHS));
}
// If the sign bits of both operands are zero (i.e. we can prove they are
@@ -3070,7 +2778,7 @@
APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
- return BinaryOperator::createUDiv(Op0, Op1, I.getName());
+ return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
}
@@ -3187,7 +2895,7 @@
// if so, convert to a bitwise and.
if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue().isPowerOf2())
- return BinaryOperator::createAnd(Op0, SubOne(C));
+ return BinaryOperator::CreateAnd(Op0, SubOne(C));
}
if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
@@ -3196,9 +2904,9 @@
isa<ConstantInt>(RHSI->getOperand(0))) {
if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
Constant *N1 = ConstantInt::getAllOnesValue(I.getType());
- Value *Add = InsertNewInstBefore(BinaryOperator::createAdd(RHSI, N1,
+ Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
"tmp"), I);
- return BinaryOperator::createAnd(Op0, Add);
+ return BinaryOperator::CreateAnd(Op0, Add);
}
}
}
@@ -3212,9 +2920,9 @@
if ((STO->getValue().isPowerOf2()) &&
(SFO->getValue().isPowerOf2())) {
Value *TrueAnd = InsertNewInstBefore(
- BinaryOperator::createAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
+ BinaryOperator::CreateAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
Value *FalseAnd = InsertNewInstBefore(
- BinaryOperator::createAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
+ BinaryOperator::CreateAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
}
}
@@ -3245,7 +2953,7 @@
APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
// X srem Y -> X urem Y, iff X and Y don't have sign bit set
- return BinaryOperator::createURem(Op0, Op1, I.getName());
+ return BinaryOperator::CreateURem(Op0, Op1, I.getName());
}
}
@@ -3433,10 +3141,10 @@
case Instruction::Xor:
if (Op->hasOneUse()) {
// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
- Instruction *And = BinaryOperator::createAnd(X, AndRHS);
+ Instruction *And = BinaryOperator::CreateAnd(X, AndRHS);
InsertNewInstBefore(And, TheAnd);
And->takeName(Op);
- return BinaryOperator::createXor(And, Together);
+ return BinaryOperator::CreateXor(And, Together);
}
break;
case Instruction::Or:
@@ -3445,10 +3153,10 @@
if (Op->hasOneUse() && Together != OpRHS) {
// (X | C1) & C2 --> (X | (C1&C2)) & C2
- Instruction *Or = BinaryOperator::createOr(X, Together);
+ Instruction *Or = BinaryOperator::CreateOr(X, Together);
InsertNewInstBefore(Or, TheAnd);
Or->takeName(Op);
- return BinaryOperator::createAnd(Or, AndRHS);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
}
break;
case Instruction::Add:
@@ -3476,10 +3184,10 @@
return &TheAnd;
} else {
// Pull the XOR out of the AND.
- Instruction *NewAnd = BinaryOperator::createAnd(X, AndRHS);
+ Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS);
InsertNewInstBefore(NewAnd, TheAnd);
NewAnd->takeName(Op);
- return BinaryOperator::createXor(NewAnd, AndRHS);
+ return BinaryOperator::CreateXor(NewAnd, AndRHS);
}
}
}
@@ -3538,9 +3246,9 @@
// Make the argument unsigned.
Value *ShVal = Op->getOperand(0);
ShVal = InsertNewInstBefore(
- BinaryOperator::createLShr(ShVal, OpRHS,
+ BinaryOperator::CreateLShr(ShVal, OpRHS,
Op->getName()), TheAnd);
- return BinaryOperator::createAnd(ShVal, AndRHS, TheAnd.getName());
+ return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
}
}
break;
@@ -3574,7 +3282,7 @@
// Emit V-Lo <u Hi-Lo
Constant *NegLo = ConstantExpr::getNeg(Lo);
- Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off");
+ Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
InsertNewInstBefore(Add, IB);
Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
@@ -3594,7 +3302,7 @@
// Emit V-Lo >u Hi-1-Lo
// Note that Hi has already had one subtracted from it, above.
ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
- Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off");
+ Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
InsertNewInstBefore(Add, IB);
Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
@@ -3669,9 +3377,9 @@
Instruction *New;
if (isSub)
- New = BinaryOperator::createSub(LHSI->getOperand(0), RHS, "fold");
+ New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold");
else
- New = BinaryOperator::createAdd(LHSI->getOperand(0), RHS, "fold");
+ New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold");
return InsertNewInstBefore(New, I);
}
@@ -3719,19 +3427,19 @@
if (Op0I->hasOneUse()) {
if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
// Not masking anything out for the LHS, move to RHS.
- Instruction *NewRHS = BinaryOperator::createAnd(Op0RHS, AndRHS,
+ Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS,
Op0RHS->getName()+".masked");
InsertNewInstBefore(NewRHS, I);
- return BinaryOperator::create(
+ return BinaryOperator::Create(
cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
}
if (!isa<Constant>(Op0RHS) &&
MaskedValueIsZero(Op0RHS, NotAndRHS)) {
// Not masking anything out for the RHS, move to LHS.
- Instruction *NewLHS = BinaryOperator::createAnd(Op0LHS, AndRHS,
+ Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS,
Op0LHS->getName()+".masked");
InsertNewInstBefore(NewLHS, I);
- return BinaryOperator::create(
+ return BinaryOperator::Create(
cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
}
}
@@ -3742,9 +3450,9 @@
// ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
// ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
- return BinaryOperator::createAnd(V, AndRHS);
+ return BinaryOperator::CreateAnd(V, AndRHS);
if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
- return BinaryOperator::createAnd(V, AndRHS); // Add commutes
+ return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
break;
case Instruction::Sub:
@@ -3752,7 +3460,7 @@
// ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
// ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
- return BinaryOperator::createAnd(V, AndRHS);
+ return BinaryOperator::CreateAnd(V, AndRHS);
break;
}
@@ -3772,14 +3480,14 @@
// into : and (cast X to T), trunc_or_bitcast(C1)&C2
// This will fold the two constants together, which may allow
// other simplifications.
- Instruction *NewCast = CastInst::createTruncOrBitCast(
+ Instruction *NewCast = CastInst::CreateTruncOrBitCast(
CastOp->getOperand(0), I.getType(),
CastOp->getName()+".shrunk");
NewCast = InsertNewInstBefore(NewCast, I);
// trunc_or_bitcast(C1)&C2
Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
C3 = ConstantExpr::getAnd(C3, AndRHS);
- return BinaryOperator::createAnd(NewCast, C3);
+ return BinaryOperator::CreateAnd(NewCast, C3);
} else if (CastOp->getOpcode() == Instruction::Or) {
// Change: and (cast (or X, C1) to T), C2
// into : trunc(C1)&C2 iff trunc(C1)&C2 == C2
@@ -3808,10 +3516,10 @@
// (~A & ~B) == (~(A | B)) - De Morgan's Law
if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Instruction *Or = BinaryOperator::createOr(Op0NotVal, Op1NotVal,
+ Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal,
I.getName()+".demorgan");
InsertNewInstBefore(Or, I);
- return BinaryOperator::createNot(Or);
+ return BinaryOperator::CreateNot(Or);
}
{
@@ -3823,7 +3531,7 @@
// (A|B) & ~(A&B) -> A^B
if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::createXor(A, B);
+ return BinaryOperator::CreateXor(A, B);
}
}
@@ -3834,7 +3542,7 @@
// ~(A&B) & (A|B) -> A^B
if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::createXor(A, B);
+ return BinaryOperator::CreateXor(A, B);
}
}
@@ -3856,9 +3564,9 @@
std::swap(A, B);
}
if (A == Op0) { // A&(A^B) -> A & ~B
- Instruction *NotB = BinaryOperator::createNot(B, "tmp");
+ Instruction *NotB = BinaryOperator::CreateNot(B, "tmp");
InsertNewInstBefore(NotB, I);
- return BinaryOperator::createAnd(A, NotB);
+ return BinaryOperator::CreateAnd(A, NotB);
}
}
}
@@ -3941,7 +3649,7 @@
case ICmpInst::ICMP_NE:
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
+ Instruction *Add = BinaryOperator::CreateAdd(LHSVal, AddCST,
LHSVal->getName()+".off");
InsertNewInstBefore(Add, I);
return new ICmpInst(ICmpInst::ICMP_UGT, Add,
@@ -3983,8 +3691,7 @@
case ICmpInst::ICMP_UGT:
switch (RHSCC) {
default: assert(0 && "Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X > 13
- return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
@@ -4034,11 +3741,11 @@
I.getType(), TD) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType(), TD)) {
- Instruction *NewOp = BinaryOperator::createAnd(Op0C->getOperand(0),
+ Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0),
Op1C->getOperand(0),
I.getName());
InsertNewInstBefore(NewOp, I);
- return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
}
@@ -4049,10 +3756,10 @@
SI0->getOperand(1) == SI1->getOperand(1) &&
(SI0->hasOneUse() || SI1->hasOneUse())) {
Instruction *NewOp =
- InsertNewInstBefore(BinaryOperator::createAnd(SI0->getOperand(0),
+ InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0),
SI1->getOperand(0),
SI0->getName()), I);
- return BinaryOperator::create(SI1->getOpcode(), NewOp,
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
SI1->getOperand(1));
}
}
@@ -4224,19 +3931,19 @@
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
- Instruction *Or = BinaryOperator::createOr(X, RHS);
+ Instruction *Or = BinaryOperator::CreateOr(X, RHS);
InsertNewInstBefore(Or, I);
Or->takeName(Op0);
- return BinaryOperator::createAnd(Or,
+ return BinaryOperator::CreateAnd(Or,
ConstantInt::get(RHS->getValue() | C1->getValue()));
}
// (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
- Instruction *Or = BinaryOperator::createOr(X, RHS);
+ Instruction *Or = BinaryOperator::CreateOr(X, RHS);
InsertNewInstBefore(Or, I);
Or->takeName(Op0);
- return BinaryOperator::createXor(Or,
+ return BinaryOperator::CreateXor(Or,
ConstantInt::get(C1->getValue() & ~RHS->getValue()));
}
@@ -4272,19 +3979,19 @@
// (X^C)|Y -> (X|Y)^C iff Y&C == 0
if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
MaskedValueIsZero(Op1, C1->getValue())) {
- Instruction *NOr = BinaryOperator::createOr(A, Op1);
+ Instruction *NOr = BinaryOperator::CreateOr(A, Op1);
InsertNewInstBefore(NOr, I);
NOr->takeName(Op0);
- return BinaryOperator::createXor(NOr, C1);
+ return BinaryOperator::CreateXor(NOr, C1);
}
// Y|(X^C) -> (X|Y)^C iff Y&C == 0
if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
MaskedValueIsZero(Op0, C1->getValue())) {
- Instruction *NOr = BinaryOperator::createOr(A, Op0);
+ Instruction *NOr = BinaryOperator::CreateOr(A, Op0);
InsertNewInstBefore(NOr, I);
NOr->takeName(Op0);
- return BinaryOperator::createXor(NOr, C1);
+ return BinaryOperator::CreateXor(NOr, C1);
}
// (A & C)|(B & D)
@@ -4334,8 +4041,8 @@
if (V1) {
Value *Or =
- InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
- return BinaryOperator::createAnd(V1, Or);
+ InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I);
+ return BinaryOperator::CreateAnd(V1, Or);
}
}
}
@@ -4347,10 +4054,10 @@
SI0->getOperand(1) == SI1->getOperand(1) &&
(SI0->hasOneUse() || SI1->hasOneUse())) {
Instruction *NewOp =
- InsertNewInstBefore(BinaryOperator::createOr(SI0->getOperand(0),
+ InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0),
SI1->getOperand(0),
SI0->getName()), I);
- return BinaryOperator::create(SI1->getOpcode(), NewOp,
+ return BinaryOperator::Create(SI1->getOpcode(), NewOp,
SI1->getOperand(1));
}
}
@@ -4368,9 +4075,9 @@
// (~A | ~B) == (~(A & B)) - De Morgan's Law
if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *And = InsertNewInstBefore(BinaryOperator::createAnd(A, B,
+ Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B,
I.getName()+".demorgan"), I);
- return BinaryOperator::createNot(And);
+ return BinaryOperator::CreateNot(And);
}
}
@@ -4422,7 +4129,7 @@
case ICmpInst::ICMP_EQ:
if (LHSCst == SubOne(RHSCst)) {// (X == 13 | X == 14) -> X-13 <u 2
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
+ Instruction *Add = BinaryOperator::CreateAdd(LHSVal, AddCST,
LHSVal->getName()+".off");
InsertNewInstBefore(Add, I);
AddCST = Subtract(AddOne(RHSCst), LHSCst);
@@ -4541,11 +4248,11 @@
I.getType(), TD) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType(), TD)) {
- Instruction *NewOp = BinaryOperator::createOr(Op0C->getOperand(0),
+ Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0),
Op1C->getOperand(0),
I.getName());
InsertNewInstBefore(NewOp, I);
- return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
}
}
@@ -4576,6 +4283,8 @@
return Changed ? &I : 0;
}
+namespace {
+
// XorSelf - Implements: X ^ X --> 0
struct XorSelf {
Value *RHS;
@@ -4586,6 +4295,7 @@
}
};
+}
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
@@ -4627,13 +4337,13 @@
if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
Instruction *NotY =
- BinaryOperator::createNot(Op0I->getOperand(1),
+ BinaryOperator::CreateNot(Op0I->getOperand(1),
Op0I->getOperand(1)->getName()+".not");
InsertNewInstBefore(NotY, I);
if (Op0I->getOpcode() == Instruction::And)
- return BinaryOperator::createOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
else
- return BinaryOperator::createAnd(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
}
}
}
@@ -4652,6 +4362,25 @@
FCI->getOperand(0), FCI->getOperand(1));
}
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(),
+ Op0C->getDestTy())) {
+ Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
+ CI->getOpcode(), CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1)), I);
+ NewCI->takeName(CI);
+ return CastInst::Create(Opcode, NewCI, Op0C->getType());
+ }
+ }
+ }
+ }
+ }
+
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ~(c-X) == X-c-1 == X+(-c-1)
if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
@@ -4659,7 +4388,7 @@
Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
ConstantInt::get(I.getType(), 1));
- return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS);
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
}
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
@@ -4667,14 +4396,14 @@
// ~(X-c) --> (-c-1)-X
if (RHS->isAllOnesValue()) {
Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
- return BinaryOperator::createSub(
+ return BinaryOperator::CreateSub(
ConstantExpr::getSub(NegOp0CI,
ConstantInt::get(I.getType(), 1)),
Op0I->getOperand(0));
} else if (RHS->getValue().isSignBit()) {
// (X + C) ^ signbit -> (X + C + signbit)
Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue());
- return BinaryOperator::createAdd(Op0I->getOperand(0), C);
+ return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
}
} else if (Op0I->getOpcode() == Instruction::Or) {
@@ -4750,8 +4479,8 @@
std::swap(A, B);
if (B == Op1) { // (A|B)^B == A & ~B
Instruction *NotB =
- InsertNewInstBefore(BinaryOperator::createNot(Op1, "tmp"), I);
- return BinaryOperator::createAnd(A, NotB);
+ InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I);
+ return BinaryOperator::CreateAnd(A, NotB);
}
} else if (match(Op0I, m_Xor(m_Value(A), m_Value(B)))) {
if (Op1 == A) // (A^B)^A == B
@@ -4764,8 +4493,8 @@
if (B == Op1 && // (B&A)^A == ~B & A
!isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
Instruction *N =
- InsertNewInstBefore(BinaryOperator::createNot(A, "tmp"), I);
- return BinaryOperator::createAnd(N, Op1);
+ InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I);
+ return BinaryOperator::CreateAnd(N, Op1);
}
}
}
@@ -4776,10 +4505,10 @@
Op0I->getOperand(1) == Op1I->getOperand(1) &&
(Op1I->hasOneUse() || Op1I->hasOneUse())) {
Instruction *NewOp =
- InsertNewInstBefore(BinaryOperator::createXor(Op0I->getOperand(0),
+ InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0),
Op1I->getOperand(0),
Op0I->getName()), I);
- return BinaryOperator::create(Op1I->getOpcode(), NewOp,
+ return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
Op1I->getOperand(1));
}
@@ -4789,13 +4518,13 @@
if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::createXor(A, B);
+ return BinaryOperator::CreateXor(A, B);
}
// (A | B)^(A & B) -> A ^ B
if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
match(Op1I, m_And(m_Value(C), m_Value(D)))) {
if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::createXor(A, B);
+ return BinaryOperator::CreateXor(A, B);
}
// (A & B)^(C & D)
@@ -4815,8 +4544,8 @@
if (X) {
Instruction *NewOp =
- InsertNewInstBefore(BinaryOperator::createXor(Y, Z, Op0->getName()), I);
- return BinaryOperator::createAnd(NewOp, X);
+ InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I);
+ return BinaryOperator::CreateAnd(NewOp, X);
}
}
}
@@ -4837,14 +4566,15 @@
I.getType(), TD) &&
ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
I.getType(), TD)) {
- Instruction *NewOp = BinaryOperator::createXor(Op0C->getOperand(0),
+ Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0),
Op1C->getOperand(0),
I.getName());
InsertNewInstBefore(NewOp, I);
- return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
}
}
+
return Changed ? &I : 0;
}
@@ -4873,11 +4603,12 @@
Value *Result = Constant::getNullValue(IntPtrTy);
// Build a mask for high order bits.
- unsigned IntPtrWidth = TD.getPointerSize()*8;
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
- for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
- Value *Op = GEP->getOperand(i);
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+ ++i, ++GTI) {
+ Value *Op = *i;
uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask;
if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
if (OpC->isZero()) continue;
@@ -4890,7 +4621,7 @@
Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
else
Result = IC.InsertNewInstBefore(
- BinaryOperator::createAdd(Result,
+ BinaryOperator::CreateAdd(Result,
ConstantInt::get(IntPtrTy, Size),
GEP->getName()+".offs"), I);
continue;
@@ -4904,7 +4635,7 @@
else {
// Emit an add instruction.
Result = IC.InsertNewInstBefore(
- BinaryOperator::createAdd(Result, Scale,
+ BinaryOperator::CreateAdd(Result, Scale,
GEP->getName()+".offs"), I);
}
continue;
@@ -4922,7 +4653,7 @@
if (Constant *OpC = dyn_cast<Constant>(Op))
Op = ConstantExpr::getMul(OpC, Scale);
else // We'll let instcombine(mul) convert this to a shl if possible.
- Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale,
+ Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
GEP->getName()+".idx"), I);
}
@@ -4931,12 +4662,119 @@
Result = ConstantExpr::getAdd(cast<Constant>(Op),
cast<Constant>(Result));
else
- Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result,
+ Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
GEP->getName()+".offs"), I);
}
return Result;
}
+
+/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
+/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want
+/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be
+/// complex, and scales are involved. The above expression would also be legal
+/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This
+/// later form is less amenable to optimization though, and we are allowed to
+/// generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+ InstCombiner &IC) {
+ TargetData &TD = IC.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return 0;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return 0;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+ VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
+ VariableIdx->getNameStart(), &I);
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return 0;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ const Type *IntPtrTy = TD.getIntPtrType();
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+ true /*SExt*/,
+ VariableIdx->getNameStart(), &I);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
/// else. At this point we know that the GEP is on the LHS of the comparison.
Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
@@ -4944,15 +4782,20 @@
Instruction &I) {
assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
- if (CastInst *CI = dyn_cast<CastInst>(RHS))
- if (isa<PointerType>(CI->getOperand(0)->getType()))
- RHS = CI->getOperand(0);
+ // Look through bitcasts.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+ RHS = BCI->getOperand(0);
Value *PtrBase = GEPLHS->getOperand(0);
if (PtrBase == RHS) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
- // This transformation is valid because we know pointers can't overflow.
- Value *Offset = EmitGEPOffset(GEPLHS, I, *this);
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow. See if we can output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+
+ // If not, synthesize the offset the hard way.
+ if (Offset == 0)
+ Offset = EmitGEPOffset(GEPLHS, I, *this);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
Constant::getNullValue(Offset->getType()));
} else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
@@ -5022,7 +4865,7 @@
if (NumDifferences == 0) // SAME GEP?
return ReplaceInstUsesWith(I, // No comparison is needed here.
ConstantInt::get(Type::Int1Ty,
- isTrueWhenEqual(Cond)));
+ ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -5045,6 +4888,137 @@
return 0;
}
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+ Instruction *LHSI,
+ Constant *RHSC) {
+ if (!isa<ConstantFP>(RHSC)) return 0;
+ const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+
+ // Get the width of the mantissa. We don't want to hack on conversions that
+ // might lose information from the integer, e.g. "i64 -> float"
+ int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+ if (MantissaWidth == -1) return 0; // Unknown.
+
+ // Check to see that the input is converted from an integer type that is small
+ // enough that preserves all bits. TODO: check here for "known" sign bits.
+ // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+
+ // If this is a uitofp instruction, we need an extra bit to hold the sign.
+ if (isa<UIToFPInst>(LHSI))
+ ++InputSize;
+
+ // If the conversion would lose info, don't hack on this.
+ if ((int)InputSize > MantissaWidth)
+ return 0;
+
+ // Otherwise, we can potentially simplify the comparison. We know that it
+ // will always come through as an integer value and we know the constant is
+ // not a NAN (it would have been previously simplified).
+ assert(!RHS.isNaN() && "NaN comparison not already folded!");
+
+ ICmpInst::Predicate Pred;
+ switch (I.getPredicate()) {
+ default: assert(0 && "Unexpected predicate!");
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ: Pred = ICmpInst::ICMP_EQ; break;
+ case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGT: Pred = ICmpInst::ICMP_SGT; break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGE: Pred = ICmpInst::ICMP_SGE; break;
+ case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLT: Pred = ICmpInst::ICMP_SLT; break;
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLE: Pred = ICmpInst::ICMP_SLE; break;
+ case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE: Pred = ICmpInst::ICMP_NE; break;
+ case FCmpInst::FCMP_ORD:
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+ case FCmpInst::FCMP_UNO:
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+ }
+
+ const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+
+ // Now we know that the APFloat is a normal number, zero or inf.
+
+ // See if the FP constant is too large for the integer. For example,
+ // comparing an i8 to 300.0.
+ unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+
+ // If the RHS value is > SignedMax, fold the comparison. This handles +INF
+ // and large values.
+ APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+ SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+ }
+
+ // See if the RHS value is < SignedMin.
+ APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+ SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+ APFloat::rmNearestTiesToEven);
+ if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+ }
+
+ // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] but
+ // it may still be fractional. See if it is fractional by casting the FP
+ // value to the integer value and back, checking for equality. Don't do this
+ // for zero, because -0.0 is not fractional.
+ Constant *RHSInt = ConstantExpr::getFPToSI(RHSC, IntTy);
+ if (!RHS.isZero() &&
+ ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) != RHSC) {
+ // If we had a comparison against a fractional value, we have to adjust
+ // the compare predicate and sometimes the value. RHSC is rounded towards
+ // zero at this point.
+ switch (Pred) {
+ default: assert(0 && "Unexpected integer comparison!");
+ case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+ case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+ case ICmpInst::ICMP_SLE:
+ // (float)int <= 4.4 --> int <= 4
+ // (float)int <= -4.4 --> int < -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // (float)int < -4.4 --> int < -4
+ // (float)int < 4.4 --> int <= 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SLE;
+ break;
+ case ICmpInst::ICMP_SGT:
+ // (float)int > 4.4 --> int > 4
+ // (float)int > -4.4 --> int >= -4
+ if (RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGE;
+ break;
+ case ICmpInst::ICMP_SGE:
+ // (float)int >= -4.4 --> int >= -4
+ // (float)int >= 4.4 --> int > 4
+ if (!RHS.isNegative())
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ }
+ }
+
+ // Lower this FP comparison into an appropriate integer version of the
+ // comparison.
+ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
bool Changed = SimplifyCompare(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -5093,10 +5067,31 @@
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+ // If the constant is a nan, see if we can fold the comparison based on it.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and...
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+ assert(FCmpInst::isUnordered(I.getPredicate()) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+ }
+ }
+
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
switch (LHSI->getOpcode()) {
case Instruction::PHI:
- if (Instruction *NV = FoldOpIntoPhi(I))
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ break;
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
return NV;
break;
case Instruction::Select:
@@ -5139,7 +5134,7 @@
// icmp X, X
if (Op0 == Op1)
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
- isTrueWhenEqual(I)));
+ I.isTrueWhenEqual()));
if (isa<UndefValue>(Op1)) // X icmp undef -> undef
return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
@@ -5151,19 +5146,19 @@
(isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
isa<ConstantPointerNull>(Op1)))
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
- !isTrueWhenEqual(I)));
+ !I.isTrueWhenEqual()));
// icmp's with boolean values can always be turned into bitwise operations
if (Ty == Type::Int1Ty) {
switch (I.getPredicate()) {
default: assert(0 && "Invalid icmp instruction!");
case ICmpInst::ICMP_EQ: { // icmp eq bool %A, %B -> ~(A^B)
- Instruction *Xor = BinaryOperator::createXor(Op0, Op1, I.getName()+"tmp");
+ Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
InsertNewInstBefore(Xor, I);
- return BinaryOperator::createNot(Xor);
+ return BinaryOperator::CreateNot(Xor);
}
case ICmpInst::ICMP_NE: // icmp eq bool %A, %B -> A^B
- return BinaryOperator::createXor(Op0, Op1);
+ return BinaryOperator::CreateXor(Op0, Op1);
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT:
@@ -5171,9 +5166,9 @@
// FALL THROUGH
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT: { // icmp lt bool A, B -> ~X & Y
- Instruction *Not = BinaryOperator::createNot(Op0, I.getName()+"tmp");
+ Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
InsertNewInstBefore(Not, I);
- return BinaryOperator::createAnd(Not, Op1);
+ return BinaryOperator::CreateAnd(Not, Op1);
}
case ICmpInst::ICMP_UGE:
case ICmpInst::ICMP_SGE:
@@ -5181,9 +5176,9 @@
// FALL THROUGH
case ICmpInst::ICMP_ULE:
case ICmpInst::ICMP_SLE: { // icmp le bool %A, %B -> ~A | B
- Instruction *Not = BinaryOperator::createNot(Op0, I.getName()+"tmp");
+ Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
InsertNewInstBefore(Not, I);
- return BinaryOperator::createOr(Not, Op1);
+ return BinaryOperator::CreateOr(Not, Op1);
}
}
}
@@ -5396,8 +5391,12 @@
break;
case Instruction::PHI:
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ // Only fold icmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
break;
case Instruction::Select: {
// If either operand of the select is a constant, we can fold the
@@ -5432,7 +5431,7 @@
if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
AddToWorkList(LHSI);
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
- !isTrueWhenEqual(I)));
+ !I.isTrueWhenEqual()));
}
break;
}
@@ -5487,8 +5486,21 @@
return R;
}
+ // ~x < ~y --> y < x
+ { Value *A, *B;
+ if (match(Op0, m_Not(m_Value(A))) &&
+ match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ }
+
if (I.isEquality()) {
Value *A, *B, *C, *D;
+
+ // -x == -y --> x == y
+ if (match(Op0, m_Neg(m_Value(A))) &&
+ match(Op1, m_Neg(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), A, B);
+
if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
Value *OtherVal = A == Op1 ? B : A;
@@ -5502,7 +5514,7 @@
if (ConstantInt *C2 = dyn_cast<ConstantInt>(D))
if (Op1->hasOneUse()) {
Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue());
- Instruction *Xor = BinaryOperator::createXor(C, NC, "tmp");
+ Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
return new ICmpInst(I.getPredicate(), A,
InsertNewInstBefore(Xor, I));
}
@@ -5550,8 +5562,8 @@
}
if (X) { // Build (X^Y) & Z
- Op1 = InsertNewInstBefore(BinaryOperator::createXor(X, Y, "tmp"), I);
- Op1 = InsertNewInstBefore(BinaryOperator::createAnd(Op1, Z, "tmp"), I);
+ Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
+ Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
I.setOperand(0, Op1);
I.setOperand(1, Constant::getNullValue(Op1->getType()));
return &I;
@@ -5765,7 +5777,7 @@
APInt NewCI = RHSV;
NewCI.zext(BitWidth);
Instruction *NewAnd =
- BinaryOperator::createAnd(Cast->getOperand(0),
+ BinaryOperator::CreateAnd(Cast->getOperand(0),
ConstantInt::get(NewCST),LHSI->getName());
InsertNewInstBefore(NewAnd, ICI);
return new ICmpInst(ICI.getPredicate(), NewAnd,
@@ -5845,18 +5857,18 @@
// Compute C << Y.
Value *NS;
if (Shift->getOpcode() == Instruction::LShr) {
- NS = BinaryOperator::createShl(AndCST,
+ NS = BinaryOperator::CreateShl(AndCST,
Shift->getOperand(1), "tmp");
} else {
// Insert a logical shift.
- NS = BinaryOperator::createLShr(AndCST,
+ NS = BinaryOperator::CreateLShr(AndCST,
Shift->getOperand(1), "tmp");
}
InsertNewInstBefore(cast<Instruction>(NS), ICI);
// Compute X & (C << Y).
Instruction *NewAnd =
- BinaryOperator::createAnd(Shift->getOperand(0), NS, LHSI->getName());
+ BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
InsertNewInstBefore(NewAnd, ICI);
ICI.setOperand(0, NewAnd);
@@ -5895,7 +5907,7 @@
ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
Instruction *AndI =
- BinaryOperator::createAnd(LHSI->getOperand(0),
+ BinaryOperator::CreateAnd(LHSI->getOperand(0),
Mask, LHSI->getName()+".mask");
Value *And = InsertNewInstBefore(AndI, ICI);
return new ICmpInst(ICI.getPredicate(), And,
@@ -5911,7 +5923,7 @@
Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
(TypeBits-ShAmt->getZExtValue()-1));
Instruction *AndI =
- BinaryOperator::createAnd(LHSI->getOperand(0),
+ BinaryOperator::CreateAnd(LHSI->getOperand(0),
Mask, LHSI->getName()+".mask");
Value *And = InsertNewInstBefore(AndI, ICI);
@@ -5953,19 +5965,20 @@
// Otherwise, check to see if the bits shifted out are known to be zero.
// If so, we can compare against the unshifted value:
// (X & 4) >> 1 == 2 --> (X & 4) == 4.
- if (MaskedValueIsZero(LHSI->getOperand(0),
+ if (LHSI->hasOneUse() &&
+ MaskedValueIsZero(LHSI->getOperand(0),
APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantExpr::getShl(RHS, ShAmt));
}
- if (LHSI->hasOneUse() || RHSV == 0) {
+ if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
Constant *Mask = ConstantInt::get(Val);
Instruction *AndI =
- BinaryOperator::createAnd(LHSI->getOperand(0),
+ BinaryOperator::CreateAnd(LHSI->getOperand(0),
Mask, LHSI->getName()+".mask");
Value *And = InsertNewInstBefore(AndI, ICI);
return new ICmpInst(ICI.getPredicate(), And,
@@ -6034,7 +6047,7 @@
const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
Instruction *NewRem =
- BinaryOperator::createURem(BO->getOperand(0), BO->getOperand(1),
+ BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1),
BO->getName());
InsertNewInstBefore(NewRem, ICI);
return new ICmpInst(ICI.getPredicate(), NewRem,
@@ -6058,7 +6071,7 @@
else if (Value *NegVal = dyn_castNegVal(BOp0))
return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
else if (BO->hasOneUse()) {
- Instruction *Neg = BinaryOperator::createNeg(BOp1);
+ Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
InsertNewInstBefore(Neg, ICI);
Neg->takeName(BO);
return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
@@ -6106,7 +6119,7 @@
Constant::getNullValue(RHS->getType()));
// Replace (and X, (1 << size(X)-1) != 0) with x s< 0
- if (isSignBit(BOC)) {
+ if (BOC->getValue().isSignBit()) {
Value *X = BO->getOperand(0);
Constant *Zero = Constant::getNullValue(X->getType());
ICmpInst::Predicate pred = isICMP_NE ?
@@ -6295,7 +6308,7 @@
if (Constant *CI = dyn_cast<Constant>(Result))
return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
else
- return BinaryOperator::createNot(Result);
+ return BinaryOperator::CreateNot(Result);
}
}
@@ -6321,7 +6334,7 @@
// See if we can turn a signed shr into an unsigned shr.
if (MaskedValueIsZero(Op0,
APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
- return BinaryOperator::createLShr(Op0, I.getOperand(1));
+ return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
return 0;
}
@@ -6389,7 +6402,7 @@
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
if (BO->getOpcode() == Instruction::Mul && isLeftShift)
if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
- return BinaryOperator::createMul(BO->getOperand(0),
+ return BinaryOperator::CreateMul(BO->getOperand(0),
ConstantExpr::getShl(BOOp, Op1));
// Try to fold constant and into select arguments.
@@ -6412,7 +6425,7 @@
isa<ConstantInt>(TrOp->getOperand(1))) {
// Okay, we'll do this xform. Make the shift of shift.
Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
- Instruction *NSh = BinaryOperator::create(I.getOpcode(), TrOp, ShAmt,
+ Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
I.getName());
InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
@@ -6435,7 +6448,7 @@
MaskV = MaskV.lshr(Op1->getZExtValue());
}
- Instruction *And = BinaryOperator::createAnd(NSh, ConstantInt::get(MaskV),
+ Instruction *And = BinaryOperator::CreateAnd(NSh, ConstantInt::get(MaskV),
TI->getName());
InsertNewInstBefore(And, I); // shift1 & 0x00FF
@@ -6460,16 +6473,16 @@
if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
match(Op0BO->getOperand(1),
m_Shr(m_Value(V1), m_ConstantInt(CC))) && CC == Op1) {
- Instruction *YS = BinaryOperator::createShl(
+ Instruction *YS = BinaryOperator::CreateShl(
Op0BO->getOperand(0), Op1,
Op0BO->getName());
InsertNewInstBefore(YS, I); // (Y << C)
Instruction *X =
- BinaryOperator::create(Op0BO->getOpcode(), YS, V1,
+ BinaryOperator::Create(Op0BO->getOpcode(), YS, V1,
Op0BO->getOperand(1)->getName());
InsertNewInstBefore(X, I); // (X + (Y << C))
uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
- return BinaryOperator::createAnd(X, ConstantInt::get(
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(
APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
}
@@ -6480,16 +6493,16 @@
m_And(m_Shr(m_Value(V1), m_Value(V2)),m_ConstantInt(CC))) &&
cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse() &&
V2 == Op1) {
- Instruction *YS = BinaryOperator::createShl(
+ Instruction *YS = BinaryOperator::CreateShl(
Op0BO->getOperand(0), Op1,
Op0BO->getName());
InsertNewInstBefore(YS, I); // (Y << C)
Instruction *XM =
- BinaryOperator::createAnd(V1, ConstantExpr::getShl(CC, Op1),
+ BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
V1->getName()+".mask");
InsertNewInstBefore(XM, I); // X & (CC << C)
- return BinaryOperator::create(Op0BO->getOpcode(), YS, XM);
+ return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
}
}
@@ -6499,16 +6512,16 @@
if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
match(Op0BO->getOperand(0),
m_Shr(m_Value(V1), m_ConstantInt(CC))) && CC == Op1) {
- Instruction *YS = BinaryOperator::createShl(
+ Instruction *YS = BinaryOperator::CreateShl(
Op0BO->getOperand(1), Op1,
Op0BO->getName());
InsertNewInstBefore(YS, I); // (Y << C)
Instruction *X =
- BinaryOperator::create(Op0BO->getOpcode(), V1, YS,
+ BinaryOperator::Create(Op0BO->getOpcode(), V1, YS,
Op0BO->getOperand(0)->getName());
InsertNewInstBefore(X, I); // (X + (Y << C))
uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
- return BinaryOperator::createAnd(X, ConstantInt::get(
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(
APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
}
@@ -6519,16 +6532,16 @@
m_ConstantInt(CC))) && V2 == Op1 &&
cast<BinaryOperator>(Op0BO->getOperand(0))
->getOperand(0)->hasOneUse()) {
- Instruction *YS = BinaryOperator::createShl(
+ Instruction *YS = BinaryOperator::CreateShl(
Op0BO->getOperand(1), Op1,
Op0BO->getName());
InsertNewInstBefore(YS, I); // (Y << C)
Instruction *XM =
- BinaryOperator::createAnd(V1, ConstantExpr::getShl(CC, Op1),
+ BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
V1->getName()+".mask");
InsertNewInstBefore(XM, I); // X & (CC << C)
- return BinaryOperator::create(Op0BO->getOpcode(), XM, YS);
+ return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
}
break;
@@ -6569,11 +6582,11 @@
Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
Instruction *NewShift =
- BinaryOperator::create(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
InsertNewInstBefore(NewShift, I);
NewShift->takeName(Op0BO);
- return BinaryOperator::create(Op0BO->getOpcode(), NewShift,
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
NewRHS);
}
}
@@ -6601,21 +6614,21 @@
// Check for (X << c1) << c2 and (X >> c1) >> c2
if (I.getOpcode() == ShiftOp->getOpcode()) {
- return BinaryOperator::create(I.getOpcode(), X,
+ return BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, AmtSum));
} else if (ShiftOp->getOpcode() == Instruction::LShr &&
I.getOpcode() == Instruction::AShr) {
// ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0.
- return BinaryOperator::createLShr(X, ConstantInt::get(Ty, AmtSum));
+ return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
} else if (ShiftOp->getOpcode() == Instruction::AShr &&
I.getOpcode() == Instruction::LShr) {
// ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
Instruction *Shift =
- BinaryOperator::createAShr(X, ConstantInt::get(Ty, AmtSum));
+ BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
InsertNewInstBefore(Shift, I);
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
}
// Okay, if we get here, one shift must be left, and the other shift must be
@@ -6624,12 +6637,12 @@
// If we have ((X >>? C) << C), turn this into X & (-1 << C).
if (I.getOpcode() == Instruction::Shl) {
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
- return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
}
// If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
if (I.getOpcode() == Instruction::LShr) {
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
- return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
}
// We can simplify ((X << C) >>s C) into a trunc + sext.
// NOTE: we could do this for any C, but that would make 'unusual' integer
@@ -6661,22 +6674,22 @@
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Instruction *Shift =
- BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff));
+ BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
InsertNewInstBefore(Shift, I);
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
}
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
if (I.getOpcode() == Instruction::LShr) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Instruction *Shift =
- BinaryOperator::createLShr(X, ConstantInt::get(Ty, ShiftDiff));
+ BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
InsertNewInstBefore(Shift, I);
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
}
// We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
@@ -6689,23 +6702,23 @@
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Instruction *Shift =
- BinaryOperator::create(ShiftOp->getOpcode(), X,
+ BinaryOperator::Create(ShiftOp->getOpcode(), X,
ConstantInt::get(Ty, ShiftDiff));
InsertNewInstBefore(Shift, I);
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
}
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
if (I.getOpcode() == Instruction::LShr) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Instruction *Shift =
- BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff));
+ BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
InsertNewInstBefore(Shift, I);
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+ return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
}
// We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
@@ -6820,14 +6833,14 @@
Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
// otherwise multiply the amount and the number of elements
else if (Scale != 1) {
- Instruction *Tmp = BinaryOperator::createMul(Amt, NumElements, "tmp");
+ Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
Amt = InsertNewInstBefore(Tmp, AI);
}
}
if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true);
- Instruction *Tmp = BinaryOperator::createAdd(Amt, Off, "tmp");
+ Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
Amt = InsertNewInstBefore(Tmp, AI);
}
@@ -6861,6 +6874,16 @@
///
/// This is a truncation operation if Ty is smaller than V->getType(), or an
/// extension operation if Ty is larger.
+///
+/// If CastOpc is a truncation, then Ty will be a type smaller than V. We
+/// should return true if trunc(V) can be computed by computing V in the smaller
+/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as
+/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be
+/// efficiently truncated.
+///
+/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
+/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
+/// the final result.
bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
unsigned CastOpc,
int &NumCastsRemoved) {
@@ -6881,7 +6904,7 @@
// If the first operand is itself a cast, and is eliminable, do not count
// this as an eliminable cast. We would prefer to eliminate those two
// casts first.
- if (!isa<CastInst>(I->getOperand(0)))
+ if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse())
++NumCastsRemoved;
return true;
}
@@ -6894,6 +6917,7 @@
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
+ case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
@@ -6903,14 +6927,6 @@
CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
NumCastsRemoved);
- case Instruction::Mul:
- // A multiply can be truncated by truncating its operands.
- return Ty->getBitWidth() < OrigTy->getBitWidth() &&
- CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
- NumCastsRemoved) &&
- CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
- NumCastsRemoved);
-
case Instruction::Shl:
// If we are truncating the result of this SHL, and if it's a shift of a
// constant amount, we can always perform a SHL in a smaller type.
@@ -6946,8 +6962,23 @@
// of casts in the input.
if (I->getOpcode() == CastOpc)
return true;
-
break;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc,
+ NumCastsRemoved) &&
+ CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc,
+ NumCastsRemoved);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc,
+ NumCastsRemoved))
+ return false;
+ return true;
+ }
default:
// TODO: Can handle more cases here.
break;
@@ -6979,8 +7010,8 @@
case Instruction::Shl: {
Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
- Res = BinaryOperator::create((Instruction::BinaryOps)I->getOpcode(),
- LHS, RHS, I->getName());
+ Res = BinaryOperator::Create((Instruction::BinaryOps)I->getOpcode(),
+ LHS, RHS);
break;
}
case Instruction::Trunc:
@@ -6992,16 +7023,33 @@
if (I->getOperand(0)->getType() == Ty)
return I->getOperand(0);
- // Otherwise, must be the same type of case, so just reinsert a new one.
- Res = CastInst::create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
- Ty, I->getName());
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
+ Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+ Ty);
+ break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty);
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
break;
+ }
default:
// TODO: Can handle more cases here.
assert(0 && "Unreachable!");
break;
}
+ Res->takeName(I);
return InsertNewInstBefore(Res, *I);
}
@@ -7016,7 +7064,7 @@
isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
- return CastInst::create(opc, CSrc->getOperand(0), CI.getType());
+ return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
}
}
@@ -7211,11 +7259,11 @@
assert(SrcBitSize < DestBitSize && "Not a zext?");
Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize,
SrcBitSize));
- return BinaryOperator::createAnd(Res, C);
+ return BinaryOperator::CreateAnd(Res, C);
}
case Instruction::SExt:
// We need to emit a cast to truncate, then a cast to sext.
- return CastInst::create(Instruction::SExt,
+ return CastInst::Create(Instruction::SExt,
InsertCastBefore(Instruction::Trunc, Res, Src->getType(),
CI), DestTy);
}
@@ -7242,7 +7290,7 @@
Instruction::CastOps opcode = CI.getOpcode();
Value *Op0c = InsertOperandCastBefore(opcode, Op0, DestTy, SrcI);
Value *Op1c = InsertOperandCastBefore(opcode, Op1, DestTy, SrcI);
- return BinaryOperator::create(
+ return BinaryOperator::Create(
cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
}
}
@@ -7253,7 +7301,7 @@
Op1 == ConstantInt::getTrue() &&
(!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
Value *New = InsertOperandCastBefore(Instruction::ZExt, Op0, DestTy, &CI);
- return BinaryOperator::createXor(New, ConstantInt::get(CI.getType(), 1));
+ return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
break;
case Instruction::SDiv:
@@ -7271,7 +7319,7 @@
Op0, DestTy, SrcI);
Value *Op1c = InsertOperandCastBefore(Instruction::BitCast,
Op1, DestTy, SrcI);
- return BinaryOperator::create(
+ return BinaryOperator::Create(
cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
}
}
@@ -7289,7 +7337,7 @@
Instruction::BitCast : Instruction::Trunc);
Value *Op0c = InsertOperandCastBefore(opcode, Op0, DestTy, SrcI);
Value *Op1c = InsertOperandCastBefore(opcode, Op1, DestTy, SrcI);
- return BinaryOperator::createShl(Op0c, Op1c);
+ return BinaryOperator::CreateShl(Op0c, Op1c);
}
break;
case Instruction::AShr:
@@ -7301,7 +7349,7 @@
uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize);
if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) {
// Insert the new logical shift right.
- return BinaryOperator::createLShr(Op0, Op1);
+ return BinaryOperator::CreateLShr(Op0, Op1);
}
}
break;
@@ -7339,7 +7387,7 @@
Value *V1 = InsertCastBefore(Instruction::Trunc, SrcIOp0, Ty, CI);
Value *V2 = InsertCastBefore(Instruction::Trunc, SrcI->getOperand(1),
Ty, CI);
- return BinaryOperator::createLShr(V1, V2);
+ return BinaryOperator::CreateLShr(V1, V2);
}
} else { // This is a variable shr.
@@ -7350,9 +7398,9 @@
Value *One = ConstantInt::get(SrcI->getType(), 1);
Value *V = InsertNewInstBefore(
- BinaryOperator::createShl(One, SrcI->getOperand(1),
+ BinaryOperator::CreateShl(One, SrcI->getOperand(1),
"tmp"), CI);
- V = InsertNewInstBefore(BinaryOperator::createAnd(V,
+ V = InsertNewInstBefore(BinaryOperator::CreateAnd(V,
SrcI->getOperand(0),
"tmp"), CI);
Value *Zero = Constant::getNullValue(V->getType());
@@ -7385,16 +7433,16 @@
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getPrimitiveSizeInBits()-1);
- In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh,
+ In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
In->getName()+".lobit"),
CI);
if (In->getType() != CI.getType())
- In = CastInst::createIntegerCast(In, CI.getType(),
+ In = CastInst::CreateIntegerCast(In, CI.getType(),
false/*ZExt*/, "tmp", &CI);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
- In = InsertNewInstBefore(BinaryOperator::createXor(In, One,
+ In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
In->getName()+".not"),
CI);
}
@@ -7439,21 +7487,21 @@
if (ShiftAmt) {
// Perform a logical shr by shiftamt.
// Insert the shift to put the result in the low bit.
- In = InsertNewInstBefore(BinaryOperator::createLShr(In,
+ In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
ConstantInt::get(In->getType(), ShiftAmt),
In->getName()+".lobit"), CI);
}
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
- In = BinaryOperator::createXor(In, One, "tmp");
+ In = BinaryOperator::CreateXor(In, One, "tmp");
InsertNewInstBefore(cast<Instruction>(In), CI);
}
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
else
- return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
}
}
}
@@ -7485,12 +7533,12 @@
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
Constant *AndConst = ConstantInt::get(AndValue);
Instruction *And =
- BinaryOperator::createAnd(CSrc->getOperand(0), AndConst);
+ BinaryOperator::CreateAnd(CSrc->getOperand(0), AndConst);
// Unfortunately, if the type changed, we need to cast it back.
if (And->getType() != CI.getType()) {
And->setName(CSrc->getName()+".mask");
InsertNewInstBefore(And, CI);
- And = CastInst::createIntegerCast(And, CI.getType(), false/*ZExt*/);
+ And = CastInst::CreateIntegerCast(And, CI.getType(), false/*ZExt*/);
}
return And;
}
@@ -7511,7 +7559,7 @@
transformZExtICmp(RHS, CI, false))) {
Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
- return BinaryOperator::create(Instruction::Or, LCast, RCast);
+ return BinaryOperator::Create(Instruction::Or, LCast, RCast);
}
}
@@ -7540,21 +7588,48 @@
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getPrimitiveSizeInBits()-1);
- In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh,
+ In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
In->getName()+".lobit"),
CI);
if (In->getType() != CI.getType())
- In = CastInst::createIntegerCast(In, CI.getType(),
+ In = CastInst::CreateIntegerCast(In, CI.getType(),
true/*SExt*/, "tmp", &CI);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT)
- In = InsertNewInstBefore(BinaryOperator::createNot(In,
+ In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
In->getName()+".not"), CI);
return ReplaceInstUsesWith(CI, In);
}
}
}
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ if (getOpcode(Src) == Instruction::Trunc) {
+ Value *Op = cast<User>(Src)->getOperand(0);
+ unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth();
+ unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth();
+ unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+ unsigned NumSignBits = ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return ReplaceInstUsesWith(CI, Op);
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return new SExtInst(Op, CI.getType(), "tmp");
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return new TruncInst(Op, CI.getType(), "tmp");
+ }
+ }
return 0;
}
@@ -7625,7 +7700,7 @@
CI.getType(), CI);
RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
CI.getType(), CI);
- return BinaryOperator::create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
}
}
break;
@@ -7638,12 +7713,30 @@
return commonCastTransforms(CI);
}
-Instruction *InstCombiner::visitFPToUI(CastInst &CI) {
- return commonCastTransforms(CI);
-}
-
-Instruction *InstCombiner::visitFPToSI(CastInst &CI) {
- return commonCastTransforms(CI);
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+ // fptoui(uitofp(X)) --> X if the intermediate type has enough bits in its
+ // mantissa to accurately represent all values of X. For example, do not
+ // do this with i64->float->i64.
+ if (UIToFPInst *SrcI = dyn_cast<UIToFPInst>(FI.getOperand(0)))
+ if (SrcI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+ SrcI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+
+ return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+ // fptosi(sitofp(X)) --> X if the intermediate type has enough bits in its
+ // mantissa to accurately represent all values of X. For example, do not
+ // do this with i64->float->i64.
+ if (SIToFPInst *SrcI = dyn_cast<SIToFPInst>(FI.getOperand(0)))
+ if (SrcI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() <=
+ SrcI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+
+ return commonCastTransforms(FI);
}
Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
@@ -7868,7 +7961,7 @@
SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
FI->getOperand(0), SI.getName()+".v");
InsertNewInstBefore(NewSI, SI);
- return CastInst::create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
TI->getType());
}
@@ -7912,9 +8005,9 @@
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
if (MatchIsOpZero)
- return BinaryOperator::create(BO->getOpcode(), MatchOp, NewSI);
+ return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
else
- return BinaryOperator::create(BO->getOpcode(), NewSI, MatchOp);
+ return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
}
assert(0 && "Shouldn't get here");
return 0;
@@ -7949,33 +8042,33 @@
if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
if (C->getZExtValue()) {
// Change: A = select B, true, C --> A = or B, C
- return BinaryOperator::createOr(CondVal, FalseVal);
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
} else {
// Change: A = select B, false, C --> A = and !B, C
Value *NotCond =
- InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
"not."+CondVal->getName()), SI);
- return BinaryOperator::createAnd(NotCond, FalseVal);
+ return BinaryOperator::CreateAnd(NotCond, FalseVal);
}
} else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
if (C->getZExtValue() == false) {
// Change: A = select B, C, false --> A = and B, C
- return BinaryOperator::createAnd(CondVal, TrueVal);
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
} else {
// Change: A = select B, C, true --> A = or !B, C
Value *NotCond =
- InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
"not."+CondVal->getName()), SI);
- return BinaryOperator::createOr(NotCond, TrueVal);
+ return BinaryOperator::CreateOr(NotCond, TrueVal);
}
}
// select a, b, a -> a&b
// select a, a, b -> a|b
if (CondVal == TrueVal)
- return BinaryOperator::createOr(CondVal, FalseVal);
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
else if (CondVal == FalseVal)
- return BinaryOperator::createAnd(CondVal, TrueVal);
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
}
// Selecting between two integer constants?
@@ -7983,13 +8076,13 @@
if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
// select C, 1, 0 -> zext C to int
if (FalseValC->isZero() && TrueValC->getValue() == 1) {
- return CastInst::create(Instruction::ZExt, CondVal, SI.getType());
+ return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
} else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
// select C, 0, 1 -> zext !C to int
Value *NotCond =
- InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+ InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
"not."+CondVal->getName()), SI);
- return CastInst::create(Instruction::ZExt, NotCond, SI.getType());
+ return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
}
// FIXME: Turn select 0/-1 and -1/0 into sext from condition!
@@ -8005,7 +8098,7 @@
Value *X = IC->getOperand(0);
uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
- Instruction *SRA = BinaryOperator::create(Instruction::AShr, X,
+ Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
ShAmt, "ones");
InsertNewInstBefore(SRA, SI);
@@ -8018,7 +8111,7 @@
opc = Instruction::SExt;
else if (SRASize > SISize)
opc = Instruction::Trunc;
- return CastInst::create(opc, SRA, SI.getType());
+ return CastInst::Create(opc, SRA, SI.getType());
}
}
@@ -8043,7 +8136,7 @@
ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
Value *V = ICA;
if (ShouldNotVal)
- V = InsertNewInstBefore(BinaryOperator::create(
+ V = InsertNewInstBefore(BinaryOperator::Create(
Instruction::Xor, V, ICA->getOperand(1)), SI);
return ReplaceInstUsesWith(SI, V);
}
@@ -8148,7 +8241,7 @@
NegVal = ConstantExpr::getNeg(C);
} else {
NegVal = InsertNewInstBefore(
- BinaryOperator::createNeg(SubOp->getOperand(1), "tmp"), SI);
+ BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
}
Value *NewTrueOp = OtherAddOp;
@@ -8156,10 +8249,11 @@
if (AddOp != TI)
std::swap(NewTrueOp, NewFalseOp);
Instruction *NewSel =
- SelectInst::Create(CondVal, NewTrueOp,NewFalseOp,SI.getName()+".p");
+ SelectInst::Create(CondVal, NewTrueOp,
+ NewFalseOp, SI.getName() + ".p");
NewSel = InsertNewInstBefore(NewSel, SI);
- return BinaryOperator::createAdd(SubOp->getOperand(0), NewSel);
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
}
}
}
@@ -8182,11 +8276,12 @@
if (OpToFold) {
Constant *C = GetSelectFoldableConstant(TVI);
Instruction *NewSel =
- SelectInst::Create(SI.getCondition(), TVI->getOperand(2-OpToFold), C);
+ SelectInst::Create(SI.getCondition(),
+ TVI->getOperand(2-OpToFold), C);
InsertNewInstBefore(NewSel, SI);
NewSel->takeName(TVI);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
- return BinaryOperator::create(BO->getOpcode(), FalseVal, NewSel);
+ return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
else {
assert(0 && "Unknown instruction!!");
}
@@ -8207,11 +8302,12 @@
if (OpToFold) {
Constant *C = GetSelectFoldableConstant(FVI);
Instruction *NewSel =
- SelectInst::Create(SI.getCondition(), C, FVI->getOperand(2-OpToFold));
+ SelectInst::Create(SI.getCondition(), C,
+ FVI->getOperand(2-OpToFold));
InsertNewInstBefore(NewSel, SI);
NewSel->takeName(FVI);
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
- return BinaryOperator::create(BO->getOpcode(), TrueVal, NewSel);
+ return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
else
assert(0 && "Unknown instruction!!");
}
@@ -8247,9 +8343,9 @@
case Instruction::GetElementPtr: {
// If all indexes are zero, it is just the alignment of the base pointer.
bool AllZeroOperands = true;
- for (unsigned i = 1, e = U->getNumOperands(); i != e; ++i)
- if (!isa<Constant>(U->getOperand(i)) ||
- !cast<Constant>(U->getOperand(i))->isNullValue()) {
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
AllZeroOperands = false;
break;
}
@@ -8323,7 +8419,9 @@
// A single load+store correctly handles overlapping memory in the memmove
// case.
unsigned Size = MemOpLength->getZExtValue();
- if (Size == 0 || Size > 8 || (Size&(Size-1)))
+ if (Size == 0) return MI; // Delete this mem transfer.
+
+ if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
// Use an integer load+store unless we can find something better.
@@ -8340,7 +8438,7 @@
if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
- while (!SrcETy->isFirstClassType()) {
+ while (!SrcETy->isSingleValueType()) {
if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
if (STy->getNumElements() == 1)
SrcETy = STy->getElementType(0);
@@ -8355,7 +8453,7 @@
break;
}
- if (SrcETy->isFirstClassType())
+ if (SrcETy->isSingleValueType())
NewPtrTy = PointerType::getUnqual(SrcETy);
}
}
@@ -8377,6 +8475,48 @@
return MI;
}
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+ unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+ if (MI->getAlignment()->getZExtValue() < Alignment) {
+ MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
+ return MI;
+ }
+
+ // Extract the length and alignment and fill if they are constant.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+ ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+ if (!LenC || !FillC || FillC->getType() != Type::Int8Ty)
+ return 0;
+ uint64_t Len = LenC->getZExtValue();
+ Alignment = MI->getAlignment()->getZExtValue();
+
+ // If the length is zero, this is a no-op
+ if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+
+ // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+ if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+ const Type *ITy = IntegerType::get(Len*8); // n=1 -> i8.
+
+ Value *Dest = MI->getDest();
+ Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI);
+
+ // Alignment 0 is identity for alignment 1 for memset, but not store.
+ if (Alignment == 0) Alignment = 1;
+
+ // Extract the fill value and store.
+ uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+ InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false,
+ Alignment), *MI);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(LenC->getType()));
+ return MI;
+ }
+
+ return 0;
+}
+
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
@@ -8417,6 +8557,10 @@
CI.setOperand(0, Intrinsic::getDeclaration(M, MemCpyID));
Changed = true;
}
+
+ // memmove(x,x,size) -> noop.
+ if (MMI->getSource() == MMI->getDest())
+ return EraseInstFromFunction(CI);
}
// If we can determine a pointer alignment that is bigger than currently
@@ -8424,152 +8568,161 @@
if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
if (Instruction *I = SimplifyMemTransfer(MI))
return I;
- } else if (isa<MemSetInst>(MI)) {
- unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
- if (MI->getAlignment()->getZExtValue() < Alignment) {
- MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
- Changed = true;
- }
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+ if (Instruction *I = SimplifyMemSet(MSI))
+ return I;
}
if (Changed) return II;
- } else {
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::ppc_altivec_lvx:
- case Intrinsic::ppc_altivec_lvxl:
- case Intrinsic::x86_sse_loadu_ps:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse2_loadu_dq:
- // Turn PPC lvx -> load if the pointer is known aligned.
- // Turn X86 loadups -> load if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
- Value *Ptr = InsertBitCastBefore(II->getOperand(1),
- PointerType::getUnqual(II->getType()),
- CI);
- return new LoadInst(Ptr);
- }
- break;
- case Intrinsic::ppc_altivec_stvx:
- case Intrinsic::ppc_altivec_stvxl:
- // Turn stvx -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
- const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(1)->getType());
- Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
- return new StoreInst(II->getOperand(1), Ptr);
- }
- break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- // Turn X86 storeu -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
- const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(2)->getType());
- Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
- return new StoreInst(II->getOperand(2), Ptr);
- }
- break;
-
- case Intrinsic::x86_sse_cvttss2si: {
- // These intrinsics only demands the 0th element of its input vector. If
- // we can simplify the input based on that, do so now.
- uint64_t UndefElts;
- if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1,
- UndefElts)) {
- II->setOperand(1, V);
- return II;
+ }
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap)
+ return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse2_loadu_dq:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ // Turn X86 loadups -> load if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ Value *Ptr = InsertBitCastBefore(II->getOperand(1),
+ PointerType::getUnqual(II->getType()),
+ CI);
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getOperand(1)->getType());
+ Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(1), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getOperand(2)->getType());
+ Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(2), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvttss2si: {
+ // These intrinsics only demands the 0th element of its input vector. If
+ // we can simplify the input based on that, do so now.
+ uint64_t UndefElts;
+ if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1,
+ UndefElts)) {
+ II->setOperand(1, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+ assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ if (!isa<ConstantInt>(Mask->getOperand(i)) &&
+ !isa<UndefValue>(Mask->getOperand(i))) {
+ AllEltsOk = false;
+ break;
+ }
}
- break;
- }
- case Intrinsic::ppc_altivec_vperm:
- // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
- assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
+ Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
- // Check that all of the elements are integer constants or undefs.
- bool AllEltsOk = true;
for (unsigned i = 0; i != 16; ++i) {
- if (!isa<ConstantInt>(Mask->getOperand(i)) &&
- !isa<UndefValue>(Mask->getOperand(i))) {
- AllEltsOk = false;
- break;
- }
- }
-
- if (AllEltsOk) {
- // Cast the input vectors to byte vectors.
- Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
- Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
- Value *Result = UndefValue::get(Op0->getType());
-
- // Only extract each element once.
- Value *ExtractedElts[32];
- memset(ExtractedElts, 0, sizeof(ExtractedElts));
-
- for (unsigned i = 0; i != 16; ++i) {
- if (isa<UndefValue>(Mask->getOperand(i)))
- continue;
- unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
- Idx &= 31; // Match the hardware behavior.
-
- if (ExtractedElts[Idx] == 0) {
- Instruction *Elt =
- new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
- InsertNewInstBefore(Elt, CI);
- ExtractedElts[Idx] = Elt;
- }
+ if (isa<UndefValue>(Mask->getOperand(i)))
+ continue;
+ unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
- // Insert this value into the result vector.
- Result = InsertElementInst::Create(Result, ExtractedElts[Idx], i, "tmp");
- InsertNewInstBefore(cast<Instruction>(Result), CI);
+ if (ExtractedElts[Idx] == 0) {
+ Instruction *Elt =
+ new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
+ InsertNewInstBefore(Elt, CI);
+ ExtractedElts[Idx] = Elt;
}
- return CastInst::create(Instruction::BitCast, Result, CI.getType());
+
+ // Insert this value into the result vector.
+ Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
+ i, "tmp");
+ InsertNewInstBefore(cast<Instruction>(Result), CI);
}
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
}
- break;
+ }
+ break;
- case Intrinsic::stackrestore: {
- // If the save is right next to the restore, remove the restore. This can
- // happen when variable allocas are DCE'd.
- if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
- if (SS->getIntrinsicID() == Intrinsic::stacksave) {
- BasicBlock::iterator BI = SS;
- if (&*++BI == II)
- return EraseInstFromFunction(CI);
- }
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ BasicBlock::iterator BI = SS;
+ if (&*++BI == II)
+ return EraseInstFromFunction(CI);
}
-
- // Scan down this block to see if there is another stack restore in the
- // same block without an intervening call/alloca.
- BasicBlock::iterator BI = II;
- TerminatorInst *TI = II->getParent()->getTerminator();
- bool CannotRemove = false;
- for (++BI; &*BI != TI; ++BI) {
- if (isa<AllocaInst>(BI)) {
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI = II;
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return EraseInstFromFunction(CI);
+ // Otherwise, ignore the intrinsic.
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
CannotRemove = true;
break;
}
- if (isa<CallInst>(BI)) {
- if (!isa<IntrinsicInst>(BI)) {
- CannotRemove = true;
- break;
- }
- // If there is a stackrestore below this one, remove this one.
- return EraseInstFromFunction(CI);
- }
}
-
- // If the stack restore is in a return/unwind block and if there are no
- // allocas or calls between the restore and the return, nuke the restore.
- if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
- return EraseInstFromFunction(CI);
- break;
- }
}
+
+ // If the stack restore is in a return/unwind block and if there are no
+ // allocas or calls between the restore and the return, nuke the restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ return EraseInstFromFunction(CI);
+ break;
+ }
}
return visitCallSite(II);
@@ -8581,6 +8734,31 @@
return visitCallSite(&II);
}
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+ const CastInst * const CI,
+ const TargetData * const TD,
+ const int ix) {
+ if (!CI->isLosslessCast())
+ return false;
+
+ // The size of ByVal arguments is derived from the type, so we
+ // can't change to a type with a different size. If the size were
+ // passed explicitly we could avoid this check.
+ if (!CS.paramHasAttr(ix, ParamAttr::ByVal))
+ return true;
+
+ const Type* SrcTy =
+ cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+ const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ if (!SrcTy->isSized() || !DstTy->isSized())
+ return false;
+ if (TD->getABITypeSize(SrcTy) != TD->getABITypeSize(DstTy))
+ return false;
+ return true;
+}
+
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -8635,19 +8813,17 @@
const PointerType *PTy = cast<PointerType>(Callee->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
if (FTy->isVarArg()) {
+ int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
// See if we can optimize any arguments passed through the varargs area of
// the call.
for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
- E = CS.arg_end(); I != E; ++I)
- if (CastInst *CI = dyn_cast<CastInst>(*I)) {
- // If this cast does not effect the value passed through the varargs
- // area, we can eliminate the use of the cast.
- Value *Op = CI->getOperand(0);
- if (CI->isLosslessCast()) {
- *I = Op;
- Changed = true;
- }
+ E = CS.arg_end(); I != E; ++I, ++ix) {
+ CastInst *CI = dyn_cast<CastInst>(*I);
+ if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+ *I = CI->getOperand(0);
+ Changed = true;
}
+ }
}
if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
@@ -8678,27 +8854,28 @@
//
const FunctionType *FT = Callee->getFunctionType();
const Type *OldRetTy = Caller->getType();
+ const Type *NewRetTy = FT->getReturnType();
- if (isa<StructType>(FT->getReturnType()))
+ if (isa<StructType>(NewRetTy))
return false; // TODO: Handle multiple return values.
// Check to see if we are changing the return type...
- if (OldRetTy != FT->getReturnType()) {
- if (Callee->isDeclaration() && !Caller->use_empty() &&
- // Conversion is ok if changing from pointer to int of same size.
- !(isa<PointerType>(FT->getReturnType()) &&
- TD->getIntPtrType() == OldRetTy))
+ if (OldRetTy != NewRetTy) {
+ if (Callee->isDeclaration() &&
+ // Conversion is ok if changing from one pointer type to another or from
+ // a pointer to an integer of the same size.
+ !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
+ (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
return false; // Cannot transform this return value.
if (!Caller->use_empty() &&
// void -> non-void is handled specially
- FT->getReturnType() != Type::VoidTy &&
- !CastInst::isCastable(FT->getReturnType(), OldRetTy))
+ NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
- if (RAttrs & ParamAttr::typeIncompatible(FT->getReturnType()))
+ if (RAttrs & ParamAttr::typeIncompatible(NewRetTy))
return false; // Attribute not compatible with transformed value.
}
@@ -8730,15 +8907,11 @@
if (CallerPAL.getParamAttrs(i + 1) & ParamAttr::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
- ConstantInt *c = dyn_cast<ConstantInt>(*AI);
- // Some conversions are safe even if we do not have a body.
- // Either we can cast directly, or we can upconvert the argument
+ // Converting from one pointer type to another or between a pointer and an
+ // integer of the same size is safe even if we do not have a body.
bool isConvertible = ActTy == ParamTy ||
- (isa<PointerType>(ParamTy) && isa<PointerType>(ActTy)) ||
- (ParamTy->isInteger() && ActTy->isInteger() &&
- ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()) ||
- (c && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()
- && c->getValue().isStrictlyPositive());
+ ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
+ (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
if (Callee->isDeclaration() && !isConvertible) return false;
}
@@ -8771,7 +8944,7 @@
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs &= ~ParamAttr::typeIncompatible(FT->getReturnType());
+ RAttrs &= ~ParamAttr::typeIncompatible(NewRetTy);
// Add the new return attributes.
if (RAttrs)
@@ -8785,7 +8958,7 @@
} else {
Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
false, ParamTy, false);
- CastInst *NewCast = CastInst::create(opcode, *AI, ParamTy, "tmp");
+ CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp");
Args.push_back(InsertNewInstBefore(NewCast, *Caller));
}
@@ -8812,7 +8985,7 @@
// Must promote to pass through va_arg area!
Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false,
PTy, false);
- Instruction *Cast = CastInst::create(opcode, *AI, PTy, "tmp");
+ Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp");
InsertNewInstBefore(Cast, *Caller);
Args.push_back(Cast);
} else {
@@ -8826,7 +8999,7 @@
}
}
- if (FT->getReturnType() == Type::VoidTy)
+ if (NewRetTy == Type::VoidTy)
Caller->setName(""); // Void type should not have a name.
const PAListPtr &NewCallerPAL = PAListPtr::get(attrVec.begin(),attrVec.end());
@@ -8834,7 +9007,8 @@
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
- Args.begin(), Args.end(), Caller->getName(), Caller);
+ Args.begin(), Args.end(),
+ Caller->getName(), Caller);
cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NC)->setParamAttrs(NewCallerPAL);
} else {
@@ -8853,13 +9027,12 @@
if (NV->getType() != Type::VoidTy) {
Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
OldRetTy, false);
- NV = NC = CastInst::create(opcode, NC, OldRetTy, "tmp");
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
// If this is an invoke instruction, we should insert it after the first
// non-phi, instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- BasicBlock::iterator I = II->getNormalDest()->begin();
- while (isa<PHINode>(I)) ++I;
+ BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call instr
@@ -8895,8 +9068,7 @@
IntrinsicInst *Tramp =
cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
- Function *NestF =
- cast<Function>(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2)));
+ Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
@@ -9076,7 +9248,8 @@
Value *InRHS = FirstInst->getOperand(1);
PHINode *NewLHS = 0, *NewRHS = 0;
if (LHSVal == 0) {
- NewLHS = PHINode::Create(LHSType, FirstInst->getOperand(0)->getName()+".pn");
+ NewLHS = PHINode::Create(LHSType,
+ FirstInst->getOperand(0)->getName() + ".pn");
NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
InsertNewInstBefore(NewLHS, PN);
@@ -9084,7 +9257,8 @@
}
if (RHSVal == 0) {
- NewRHS = PHINode::Create(RHSType, FirstInst->getOperand(1)->getName()+".pn");
+ NewRHS = PHINode::Create(RHSType,
+ FirstInst->getOperand(1)->getName() + ".pn");
NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
InsertNewInstBefore(NewRHS, PN);
@@ -9104,9 +9278,9 @@
}
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
- return BinaryOperator::create(BinOp->getOpcode(), LHSVal, RHSVal);
+ return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
else if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
- return CmpInst::create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal,
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal,
RHSVal);
else {
assert(isa<GetElementPtrInst>(FirstInst));
@@ -9205,6 +9379,15 @@
LI->getParent() != PN.getIncomingBlock(i) ||
!isSafeToSinkLoad(LI))
return 0;
+
+ // If the PHI is volatile and its block has multiple successors, sinking
+ // it would remove a load of the volatile value from the path through the
+ // other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
+
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
@@ -9240,17 +9423,22 @@
// Insert and return the new operation.
if (CastInst* FirstCI = dyn_cast<CastInst>(FirstInst))
- return CastInst::create(FirstCI->getOpcode(), PhiVal, PN.getType());
- else if (isa<LoadInst>(FirstInst))
- return new LoadInst(PhiVal, "", isVolatile);
- else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
- return BinaryOperator::create(BinOp->getOpcode(), PhiVal, ConstantOp);
- else if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
- return CmpInst::create(CIOp->getOpcode(), CIOp->getPredicate(),
+ return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+ return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
PhiVal, ConstantOp);
- else
- assert(0 && "Unknown operation");
- return 0;
+ assert(isa<LoadInst>(FirstInst) && "Unknown operation");
+
+ // If this was a volatile load that we are merging, make sure to loop through
+ // and mark all the input loads as non-volatile. If we don't do this, we will
+ // insert a new volatile load and the old ones will not be deletable.
+ if (isVolatile)
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+
+ return new LoadInst(PhiVal, "", isVolatile);
}
/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
@@ -9416,9 +9604,10 @@
bool MadeChange = false;
gep_type_iterator GTI = gep_type_begin(GEP);
- for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
+ for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
+ i != e; ++i, ++GTI) {
if (isa<SequentialType>(*GTI)) {
- if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
+ if (CastInst *CI = dyn_cast<CastInst>(*i)) {
if (CI->getOpcode() == Instruction::ZExt ||
CI->getOpcode() == Instruction::SExt) {
const Type *SrcTy = CI->getOperand(0)->getType();
@@ -9426,7 +9615,7 @@
// is a 32-bit pointer target.
if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
MadeChange = true;
- GEP.setOperand(i, CI->getOperand(0));
+ *i = CI->getOperand(0);
}
}
}
@@ -9434,15 +9623,15 @@
// to what we need. If the incoming value needs a cast instruction,
// insert it. This explicit cast can make subsequent optimizations more
// obvious.
- Value *Op = GEP.getOperand(i);
+ Value *Op = *i;
if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
if (Constant *C = dyn_cast<Constant>(Op)) {
- GEP.setOperand(i, ConstantExpr::getTrunc(C, TD->getIntPtrType()));
+ *i = ConstantExpr::getTrunc(C, TD->getIntPtrType());
MadeChange = true;
} else {
Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
GEP);
- GEP.setOperand(i, Op);
+ *i = Op;
MadeChange = true;
}
}
@@ -9534,7 +9723,7 @@
if (isa<Constant>(SO1) && isa<Constant>(GO1))
Sum = ConstantExpr::getAdd(cast<Constant>(SO1), cast<Constant>(GO1));
else {
- Sum = BinaryOperator::createAdd(SO1, GO1, PtrOp->getName()+".sum");
+ Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
InsertNewInstBefore(cast<Instruction>(Sum), GEP);
}
}
@@ -9665,7 +9854,7 @@
if (Scale->getZExtValue() != 1) {
Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
false /*ZExt*/);
- Instruction *Sc = BinaryOperator::createMul(NewIdx, C, "idxscale");
+ Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
NewIdx = InsertNewInstBefore(Sc, GEP);
}
@@ -9789,8 +9978,8 @@
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
- const std::string &Str = CE->getOperand(0)->getStringValue();
- if (!Str.empty()) {
+ std::string Str;
+ if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
unsigned len = Str.length();
const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned numBits = Ty->getPrimitiveSizeInBits();
@@ -9882,10 +10071,16 @@
while (BBI != E) {
--BBI;
+ // If we see a free or a call (which might do a free) the pointer could be
+ // marked invalid.
+ if (isa<FreeInst>(BBI) || isa<CallInst>(BBI))
+ return false;
+
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
if (LI->getOperand(0) == V) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
if (SI->getOperand(1) == V) return true;
+ }
}
return false;
@@ -10099,7 +10294,7 @@
NewCast = ConstantExpr::getCast(opcode, C, CastDstTy);
else
NewCast = IC.InsertNewInstBefore(
- CastInst::create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"),
+ CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"),
SI);
return new StoreInst(NewCast, CastOp);
}
@@ -10120,7 +10315,7 @@
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
- if (Ptr->hasOneUse()) {
+ if (Ptr->hasOneUse() && !SI.isVolatile()) {
if (isa<AllocaInst>(Ptr)) {
EraseInstFromFunction(SI);
++NumCombined;
@@ -10177,7 +10372,7 @@
}
// Don't skip over loads or things that can modify memory.
- if (BBI->mayWriteToMemory())
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
}
@@ -10257,8 +10452,12 @@
}
if (++PI != pred_end(DestBB))
return false;
-
-
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
// Verify that the other block ends in a branch and is not otherwise empty.
BasicBlock::iterator BBI = OtherBB->getTerminator();
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
@@ -10291,18 +10490,19 @@
return false;
break;
}
- // If we find something that may be using the stored value, or if we run
- // out of instructions, we can't do the xform.
- if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
BBI == OtherBB->begin())
return false;
}
// In order to eliminate the store in OtherBr, we have to
- // make sure nothing reads the stored value in StoreBB.
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
// FIXME: This should really be AA driven.
- if (isa<LoadInst>(I) || I->mayWriteToMemory())
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
return false;
}
}
@@ -10319,8 +10519,7 @@
// Advance to a place where it is safe to insert the new store and
// insert it.
- BBI = DestBB->begin();
- while (isa<PHINode>(BBI)) ++BBI;
+ BBI = DestBB->getFirstNonPHI();
InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
OtherStore->isVolatile()), *BBI);
@@ -10407,6 +10606,16 @@
return 0;
}
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ // See if we are trying to extract a known value. If so, use that instead.
+ if (Value *Elt = FindInsertedValue(EV.getOperand(0), EV.idx_begin(),
+ EV.idx_end(), &EV))
+ return ReplaceInstUsesWith(EV, Elt);
+
+ // No changes
+ return 0;
+}
+
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation.
static bool CheapToScalarize(Value *V, bool isConstant) {
@@ -10458,11 +10667,11 @@
std::vector<unsigned> Result;
const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
- for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
- if (isa<UndefValue>(CP->getOperand(i)))
+ for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+ if (isa<UndefValue>(*i))
Result.push_back(NElts*2); // undef -> 8
else
- Result.push_back(cast<ConstantInt>(CP->getOperand(i))->getZExtValue());
+ Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
return Result;
}
@@ -10511,7 +10720,6 @@
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-
// If vector val is undef, replace extract with scalar undef.
if (isa<UndefValue>(EI.getOperand(0)))
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@@ -10521,8 +10729,9 @@
return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
- // If vector val is constant with uniform operands, replace EI
- // with that operand
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. When the elements are not identical, we cannot replace yet
+ // (we do that below, but only when the index is constant).
Constant *op0 = C->getOperand(0);
for (unsigned i = 1; i < C->getNumOperands(); ++i)
if (C->getOperand(i) != op0) {
@@ -10588,15 +10797,15 @@
EI.getName()+".rhs");
InsertNewInstBefore(newEI0, EI);
InsertNewInstBefore(newEI1, EI);
- return BinaryOperator::create(BO->getOpcode(), newEI0, newEI1);
+ return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
}
} else if (isa<LoadInst>(I)) {
unsigned AS =
cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
Value *Ptr = InsertBitCastBefore(I->getOperand(0),
PointerType::get(EI.getType(), AS),EI);
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName() + ".gep");
+ GetElementPtrInst *GEP =
+ GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
InsertNewInstBefore(GEP, EI);
return new LoadInst(GEP);
}
@@ -10971,7 +11180,8 @@
assert(I->hasOneUse() && "Invariants didn't hold!");
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I) || I->mayWriteToMemory()) return false;
+ if (isa<PHINode>(I) || I->mayWriteToMemory() || isa<TerminatorInst>(I))
+ return false;
// Do not sink alloca instructions out of the entry block.
if (isa<AllocaInst>(I) && I->getParent() ==
@@ -10980,15 +11190,14 @@
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- for (BasicBlock::iterator Scan = LI, E = LI->getParent()->end();
+ if (I->mayReadFromMemory()) {
+ for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
Scan != E; ++Scan)
if (Scan->mayWriteToMemory())
return false;
}
- BasicBlock::iterator InsertPos = DestBlock->begin();
- while (isa<PHINode>(InsertPos)) ++InsertPos;
+ BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
I->moveBefore(InsertPos);
++NumSunkInst;
@@ -11044,15 +11253,12 @@
// Recursively visit successors. If this is a branch or switch on a
// constant, only visit the reachable successor.
- if (BB->getUnwindDest())
- Worklist.push_back(BB->getUnwindDest());
TerminatorInst *TI = BB->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
- if (ReachableBB != BB->getUnwindDest())
- Worklist.push_back(ReachableBB);
+ Worklist.push_back(ReachableBB);
continue;
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -11061,8 +11267,7 @@
for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
if (SI->getCaseValue(i) == Cond) {
BasicBlock *ReachableBB = SI->getSuccessor(i);
- if (ReachableBB != BB->getUnwindDest())
- Worklist.push_back(ReachableBB);
+ Worklist.push_back(ReachableBB);
continue;
}
@@ -11142,8 +11347,20 @@
continue;
}
+ if (TD && I->getType()->getTypeID() == Type::VoidTyID) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i)) {
+ if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+ i->set(NewC);
+ }
+ }
+ }
+
// See if we can trivially sink this instruction to a successor basic block.
- if (I->hasOneUse()) {
+ // FIXME: Remove GetResultInst test when first class support for aggregates
+ // is implemented.
+ if (I->hasOneUse() && !isa<GetResultInst>(I)) {
BasicBlock *BB = I->getParent();
BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
if (UserParent != BB) {
@@ -11248,7 +11465,7 @@
// Iterate while there is work to do.
unsigned Iteration = 0;
- while (DoOneIteration(F, Iteration++))
+ while (DoOneIteration(F, Iteration++))
EverMadeChange = true;
return EverMadeChange;
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/JumpThreading.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/JumpThreading.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/JumpThreading.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/JumpThreading.cpp Sun Jul 6 15:45:41 2008
@@ -17,6 +17,7 @@
#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -56,11 +57,18 @@
bool runOnFunction(Function &F);
bool ThreadBlock(BasicBlock *BB);
void ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+ BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+
+ bool ProcessJumpOnPHI(PHINode *PN);
+ bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd);
+ bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB);
};
- char JumpThreading::ID = 0;
- RegisterPass<JumpThreading> X("jump-threading", "Jump Threading");
}
+char JumpThreading::ID = 0;
+static RegisterPass<JumpThreading>
+X("jump-threading", "Jump Threading");
+
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -82,12 +90,34 @@
return EverChanged;
}
+/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
+/// value for the PHI, factor them together so we get one block to thread for
+/// the whole group.
+/// This is important for things like "phi i1 [true, true, false, true, x]"
+/// where we only need to clone the block for the true blocks once.
+///
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+ SmallVector<BasicBlock*, 16> CommonPreds;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == CstVal)
+ CommonPreds.push_back(PN->getIncomingBlock(i));
+
+ if (CommonPreds.size() == 1)
+ return CommonPreds[0];
+
+ DOUT << " Factoring out " << CommonPreds.size()
+ << " common predecessors.\n";
+ return SplitBlockPredecessors(PN->getParent(),
+ &CommonPreds[0], CommonPreds.size(),
+ ".thr_comm", this);
+}
+
+
/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
/// thread across it.
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
- BasicBlock::const_iterator I = BB->begin();
/// Ignore PHI nodes, these will be flattened when duplication happens.
- while (isa<PHINode>(*I)) ++I;
+ BasicBlock::const_iterator I = BB->getFirstNonPHI();
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
@@ -127,7 +157,7 @@
/// ThreadBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ThreadBlock(BasicBlock *BB) {
- // See if this block ends with a branch of switch. If so, see if the
+ // See if this block ends with a branch or switch. If so, see if the
// condition is a phi node. If so, and if an entry of the phi node is a
// constant, we can thread the block.
Value *Condition;
@@ -141,7 +171,7 @@
return false; // Must be an invoke.
// If the terminator of this block is branching on a constant, simplify the
- // terminator to an unconditional branch. This can occur do to threading in
+ // terminator to an unconditional branch. This can occur due to threading in
// other blocks.
if (isa<ConstantInt>(Condition)) {
DOUT << " In block '" << BB->getNameStart()
@@ -157,25 +187,50 @@
// See if this is a phi node in the current block.
PHINode *PN = dyn_cast<PHINode>(Condition);
- if (!PN || PN->getParent() != BB) return false;
+ if (PN && PN->getParent() == BB)
+ return ProcessJumpOnPHI(PN);
+
+ // If this is a conditional branch whose condition is and/or of a phi, try to
+ // simplify it.
+ if (BinaryOperator *CondI = dyn_cast<BinaryOperator>(Condition)) {
+ if ((CondI->getOpcode() == Instruction::And ||
+ CondI->getOpcode() == Instruction::Or) &&
+ isa<BranchInst>(BB->getTerminator()) &&
+ ProcessBranchOnLogical(CondI, BB,
+ CondI->getOpcode() == Instruction::And))
+ return true;
+ }
+
+ // If we have "br (phi != 42)" and the phi node has any constant values as
+ // operands, we can thread through this block.
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(Condition))
+ if (isa<PHINode>(CondCmp->getOperand(0)) &&
+ isa<Constant>(CondCmp->getOperand(1)) &&
+ ProcessBranchOnCompare(CondCmp, BB))
+ return true;
+ return false;
+}
+
+/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in
+/// the current block. See if there are any simplifications we can do based on
+/// inputs to the phi node.
+///
+bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
// See if the phi node has any constant values. If so, we can determine where
// the corresponding predecessor will branch.
- unsigned PredNo = ~0U;
ConstantInt *PredCst = 0;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) {
- PredNo = i;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i))))
break;
- }
- }
// If no incoming value has a constant, we don't know the destination of any
// predecessors.
- if (PredNo == ~0U)
+ if (PredCst == 0)
return false;
// See if the cost of duplicating this block is low enough.
+ BasicBlock *BB = PN->getParent();
unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
if (JumpThreadCost > Threshold) {
DOUT << " Not threading BB '" << BB->getNameStart()
@@ -183,9 +238,11 @@
return false;
}
- // If so, we can actually do this threading. Figure out which predecessor and
- // which successor we are threading for.
- BasicBlock *PredBB = PN->getIncomingBlock(PredNo);
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, figure out which successor we are threading to.
BasicBlock *SuccBB;
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse());
@@ -194,21 +251,180 @@
SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst));
}
- // TODO: If there are multiple preds with the same incoming value for the PHI,
- // factor them together so we get one thread block for the whole group. This
- // is important for things like "phi i1 [true, true, false, true, x]" where
- // we only need to clone the block for the true blocks once.
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - would thread to self!\n";
+ return false;
+ }
+ // And finally, do it!
DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '"
<< SuccBB->getNameStart() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
- << *BB;
+ << *BB << "\n";
ThreadEdge(BB, PredBB, SuccBB);
++NumThreads;
return true;
}
+/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
+/// whose condition is an AND/OR where one side is PN. If PN has constant
+/// operands that permit us to evaluate the condition for some operand, thread
+/// through the block. For example with:
+/// br (and X, phi(Y, Z, false))
+/// the predecessor corresponding to the 'false' will always jump to the false
+/// destination of the branch.
+///
+bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
+ bool isAnd) {
+ // If this is a binary operator tree of the same AND/OR opcode, check the
+ // LHS/RHS.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if ((isAnd && BO->getOpcode() == Instruction::And) ||
+ (!isAnd && BO->getOpcode() == Instruction::Or)) {
+ if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd))
+ return true;
+ if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd))
+ return true;
+ }
+
+ // If this isn't a PHI node, we can't handle it.
+ PHINode *PN = dyn_cast<PHINode>(V);
+ if (!PN || PN->getParent() != BB) return false;
+
+ // We can only do the simplification for phi nodes of 'false' with AND or
+ // 'true' with OR. See if we have any entries in the phi for this.
+ unsigned PredNo = ~0U;
+ ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (PN->getIncomingValue(i) == PredCst) {
+ PredNo = i;
+ break;
+ }
+ }
+
+ // If no match, bail out.
+ if (PredNo == ~0U)
+ return false;
+
+ // See if the cost of duplicating this block is low enough.
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, figure out which successor we are threading to. If this was an AND,
+ // the constant must be FALSE, and we must be targeting the 'false' block.
+ // If this is an OR, the constant must be TRUE, and we must be targeting the
+ // 'true' block.
+ BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
+
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - would thread to self!\n";
+ return false;
+ }
+
+ // And finally, do it!
+ DOUT << " Threading edge through bool from '" << PredBB->getNameStart()
+ << "' to '" << SuccBB->getNameStart() << "' with cost: "
+ << JumpThreadCost << ", across block:\n "
+ << *BB << "\n";
+
+ ThreadEdge(BB, PredBB, SuccBB);
+ ++NumThreads;
+ return true;
+}
+
+/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
+/// node and a constant. If the PHI node contains any constants as inputs, we
+/// can fold the compare for that edge and thread through it.
+bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
+ PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
+ Constant *RHS = cast<Constant>(Cmp->getOperand(1));
+
+ // If the phi isn't in the current block, an incoming edge to this block
+ // doesn't control the destination.
+ if (PN->getParent() != BB)
+ return false;
+
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ Constant *PredCst = 0;
+ bool TrueDirection = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (PredCst == 0) continue;
+
+ Constant *Res;
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))
+ Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);
+ else
+ Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),
+ PredCst, RHS);
+ // If this folded to a constant expr, we can't do anything.
+ if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
+ TrueDirection = ResC->getZExtValue();
+ break;
+ }
+ // If this folded to undef, just go the false way.
+ if (isa<UndefValue>(Res)) {
+ TrueDirection = false;
+ break;
+ }
+
+ // Otherwise, we can't fold this input.
+ PredCst = 0;
+ }
+
+ // If no match, bail out.
+ if (PredCst == 0)
+ return false;
+
+ // See if the cost of duplicating this block is low enough.
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+ if (JumpThreadCost > Threshold) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - Cost is too high: " << JumpThreadCost << "\n";
+ return false;
+ }
+
+ // If so, we can actually do this threading. Merge any common predecessors
+ // that will act the same.
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+
+ // Next, get our successor.
+ BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
+
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ DOUT << " Not threading BB '" << BB->getNameStart()
+ << "' - would thread to self!\n";
+ return false;
+ }
+
+
+ // And finally, do it!
+ DOUT << " Threading edge through bool from '" << PredBB->getNameStart()
+ << "' to '" << SuccBB->getNameStart() << "' with cost: "
+ << JumpThreadCost << ", across block:\n "
+ << *BB << "\n";
+
+ ThreadEdge(BB, PredBB, SuccBB);
+ ++NumThreads;
+ return true;
+}
+
+
/// ThreadEdge - We have decided that it is safe and profitable to thread an
/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this
/// change.
@@ -218,12 +434,27 @@
// Jump Threading can not update SSA properties correctly if the values
// defined in the duplicated block are used outside of the block itself. For
// this reason, we spill all values that are used outside of BB to the stack.
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
- if (I->isUsedOutsideOfBlock(BB)) {
- // We found a use of I outside of BB. Create a new stack slot to
- // break this inter-block usage pattern.
+ for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ if (!I->isUsedOutsideOfBlock(BB))
+ continue;
+
+ // We found a use of I outside of BB. Create a new stack slot to
+ // break this inter-block usage pattern.
+ if (!isa<StructType>(I->getType())) {
DemoteRegToStack(*I);
+ continue;
}
+
+ // Alternatively, I must be a call or invoke that returns multiple retvals.
+ // We can't use 'DemoteRegToStack' because that will create loads and
+ // stores of aggregates which is not valid yet. If I is a call, we can just
+ // pull all the getresult instructions up to this block. If I is an invoke,
+ // we are out of luck.
+ BasicBlock::iterator IP = I; ++IP;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI)
+ cast<GetResultInst>(UI)->moveBefore(IP);
+ }
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LICM.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LICM.cpp Sun Jul 6 15:45:41 2008
@@ -58,11 +58,11 @@
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromoted , "Number of memory locations promoted to registers");
-namespace {
- cl::opt<bool>
- DisablePromotion("disable-licm-promotion", cl::Hidden,
- cl::desc("Disable memory promotion in LICM pass"));
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+ cl::desc("Disable memory promotion in LICM pass"));
+namespace {
struct VISIBILITY_HIDDEN LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
LICM() : LoopPass((intptr_t)&ID) {}
@@ -216,11 +216,11 @@
std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
std::map<Value*, AllocaInst*> &Val2AlMap);
};
-
- char LICM::ID = 0;
- RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
}
+char LICM::ID = 0;
+static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
+
LoopPass *llvm::createLICMPass() { return new LICM(); }
/// Hoist expressions out of the specified loop. Note, alias info for inner
@@ -258,10 +258,12 @@
// Because subloops have already been incorporated into AST, we skip blocks in
// subloops.
//
- for (std::vector<BasicBlock*>::const_iterator I = L->getBlocks().begin(),
- E = L->getBlocks().end(); I != E; ++I)
- if (LI->getLoopFor(*I) == L) // Ignore blocks in subloops...
- CurAST->add(**I); // Incorporate the specified basic block
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops...
+ CurAST->add(*BB); // Incorporate the specified basic block
+ }
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
@@ -472,8 +474,7 @@
// nodes in it.
I.removeFromParent();
- BasicBlock::iterator InsertPt = ExitBlocks[0]->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
ExitBlocks[0]->getInstList().insert(InsertPt, &I);
}
} else if (ExitBlocks.empty()) {
@@ -542,8 +543,7 @@
// If we haven't already processed this exit block, do so now.
if (InsertedBlocks.insert(ExitBlock).second) {
// Insert the code after the last PHI node...
- BasicBlock::iterator InsertPt = ExitBlock->begin();
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
// If this is the first exit block processed, just move the original
// instruction, otherwise clone the original instruction and insert
@@ -700,12 +700,11 @@
// Scan the basic blocks in the loop, replacing uses of our pointers with
// uses of the allocas in question.
//
- const std::vector<BasicBlock*> &LoopBBs = CurLoop->getBlocks();
- for (std::vector<BasicBlock*>::const_iterator I = LoopBBs.begin(),
- E = LoopBBs.end(); I != E; ++I) {
+ for (Loop::block_iterator I = CurLoop->block_begin(),
+ E = CurLoop->block_end(); I != E; ++I) {
+ BasicBlock *BB = *I;
// Rewrite all loads and stores in the block of the pointer...
- for (BasicBlock::iterator II = (*I)->begin(), E = (*I)->end();
- II != E; ++II) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
if (LoadInst *L = dyn_cast<LoadInst>(II)) {
std::map<Value*, AllocaInst*>::iterator
I = ValueToAllocaMap.find(L->getOperand(0));
@@ -726,30 +725,30 @@
// want to insert one copy of the code in each exit block, though the loop may
// exit to the same block more than once.
//
- std::set<BasicBlock*> ProcessedBlocks;
+ SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ProcessedBlocks.insert(ExitBlocks[i]).second) {
- // Copy all of the allocas into their memory locations.
- BasicBlock::iterator BI = ExitBlocks[i]->begin();
- while (isa<PHINode>(*BI))
- ++BI; // Skip over all of the phi nodes in the block.
- Instruction *InsertPos = BI;
- unsigned PVN = 0;
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- // Load from the alloca.
- LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
- // If this is a pointer type, update alias info appropriately.
- if (isa<PointerType>(LI->getType()))
- CurAST->copyValue(PointerValueNumbers[PVN++], LI);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ if (!ProcessedBlocks.insert(ExitBlocks[i]))
+ continue;
+
+ // Copy all of the allocas into their memory locations.
+ BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
+ Instruction *InsertPos = BI;
+ unsigned PVN = 0;
+ for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
+ // Load from the alloca.
+ LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
+
+ // If this is a pointer type, update alias info appropriately.
+ if (isa<PointerType>(LI->getType()))
+ CurAST->copyValue(PointerValueNumbers[PVN++], LI);
- // Store into the memory we promoted.
- new StoreInst(LI, PromotedValues[i].second, InsertPos);
- }
+ // Store into the memory we promoted.
+ new StoreInst(LI, PromotedValues[i].second, InsertPos);
}
+ }
// Now that we have done the deed, use the mem2reg functionality to promote
// all of the new allocas we just created into real SSA registers.
@@ -771,14 +770,8 @@
std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
- SmallVector<Instruction *, 4> LoopExits;
- SmallVector<BasicBlock *, 4> Blocks;
- CurLoop->getExitingBlocks(Blocks);
- for (SmallVector<BasicBlock *, 4>::iterator BI = Blocks.begin(),
- BE = Blocks.end(); BI != BE; ++BI) {
- BasicBlock *BB = *BI;
- LoopExits.push_back(BB->getTerminator());
- }
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ CurLoop->getExitingBlocks(ExitingBlocks);
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
@@ -787,72 +780,76 @@
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
- if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias() &&
- !AS.isVolatile() && CurLoop->isLoopInvariant(AS.begin()->first)) {
- assert(!AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
- Value *V = AS.begin()->first;
-
- // Check that all of the pointers in the alias set have the same type. We
- // cannot (yet) promote a memory location that is loaded and stored in
- // different sizes.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->first))
+ continue;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *V = AS.begin()->first;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ {
bool PointerOk = true;
for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
if (V->getType() != I->first->getType()) {
PointerOk = false;
break;
}
+ if (!PointerOk)
+ continue;
+ }
- // If one use of value V inside the loop is safe then it is OK to promote
- // this value. On the otherside if there is not any unsafe use inside the
- // loop then also it is OK to promote this value. Otherwise it is
- // unsafe to promote this value.
- if (PointerOk) {
- bool oneSafeUse = false;
- bool oneUnsafeUse = false;
- for(Value::use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- Instruction *Use = dyn_cast<Instruction>(*UI);
- if (!Use || !CurLoop->contains(Use->getParent()))
- continue;
- for (SmallVector<Instruction *, 4>::iterator
- ExitI = LoopExits.begin(), ExitE = LoopExits.end();
- ExitI != ExitE; ++ExitI) {
- Instruction *Ex = *ExitI;
- if (!isa<PHINode>(Use) && DT->dominates(Use, Ex)) {
- oneSafeUse = true;
- break;
- }
- else
- oneUnsafeUse = true;
- }
-
- if (oneSafeUse)
- break;
- }
-
- if (oneSafeUse)
- PointerOk = true;
- else if (!oneUnsafeUse)
- PointerOk = true;
- else
- PointerOk = false;
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
+ //
+ // for () { if (c) *P += 1; }
+ //
+ // into:
+ //
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+ bool InvalidInst = false;
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ // Ignore instructions not in this loop.
+ Instruction *Use = dyn_cast<Instruction>(*UI);
+ if (!Use || !CurLoop->contains(Use->getParent()))
+ continue;
+
+ if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
+ InvalidInst = true;
+ break;
}
- if (PointerOk) {
- const Type *Ty = cast<PointerType>(V->getType())->getElementType();
- AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
- PromotedValues.push_back(std::make_pair(AI, V));
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+ }
- // Update the AST and alias analysis.
- CurAST->copyValue(V, AI);
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it. If there isn't a guaranteed-to-execute instruction, we can't
+ // promote.
+ if (InvalidInst || !GuaranteedToExecute)
+ continue;
+
+ const Type *Ty = cast<PointerType>(V->getType())->getElementType();
+ AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
+ PromotedValues.push_back(std::make_pair(AI, V));
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- ValueToAllocaMap.insert(std::make_pair(I->first, AI));
+ // Update the AST and alias analysis.
+ CurAST->copyValue(V, AI);
- DOUT << "LICM: Promoting value: " << *V << "\n";
- }
- }
+ for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
+ ValueToAllocaMap.insert(std::make_pair(I->first, AI));
+
+ DOUT << "LICM: Promoting value: " << *V << "\n";
}
}
Added: llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopDeletion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopDeletion.cpp?rev=53163&view=auto
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopDeletion.cpp (added)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopDeletion.cpp Sun Jul 6 15:45:41 2008
@@ -0,0 +1,280 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+ class VISIBILITY_HIDDEN LoopDeletion : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopDeletion() : LoopPass((intptr_t)&ID) { }
+
+ // Possibly eliminate loop L if it is dead.
+ bool runOnLoop(Loop* L, LPPassManager& LPM);
+
+ bool SingleDominatingExit(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks);
+ bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks);
+ bool IsLoopInvariantInst(Instruction *I, Loop* L);
+
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ }
+ };
+}
+
+char LoopDeletion::ID = 0;
+static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops");
+
+LoopPass* llvm::createLoopDeletionPass() {
+ return new LoopDeletion();
+}
+
+/// SingleDominatingExit - Checks that there is only a single blocks that
+/// branches out of the loop, and that it also g the latch block. Loops
+/// with multiple or non-latch-dominating exiting blocks could be dead, but we'd
+/// have to do more extensive analysis to make sure, for instance, that the
+/// control flow logic involved was or could be made loop-invariant.
+bool LoopDeletion::SingleDominatingExit(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks) {
+
+ if (exitingBlocks.size() != 1)
+ return false;
+
+ BasicBlock* latch = L->getLoopLatch();
+ if (!latch)
+ return false;
+
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ return DT.dominates(exitingBlocks[0], latch);
+}
+
+/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to
+/// a loop, which is defined as being true if all of its operands are defined
+/// outside of the loop. These instructions can be hoisted out of the loop
+/// if their results are needed. This could be made more aggressive by
+/// recursively checking the operands for invariance, but it's not clear that
+/// it's worth it.
+bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L) {
+ // PHI nodes are not loop invariant if defined in the loop.
+ if (isa<PHINode>(I) && L->contains(I->getParent()))
+ return false;
+
+ // The instruction is loop invariant if all of its operands are loop-invariant
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!L->isLoopInvariant(I->getOperand(i)))
+ return false;
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::IsLoopDead(Loop* L,
+ SmallVector<BasicBlock*, 4>& exitingBlocks,
+ SmallVector<BasicBlock*, 4>& exitBlocks) {
+ BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock* exitBlock = exitBlocks[0];
+
+ // Make sure that all PHI entries coming from the loop are loop invariant.
+ // Because the code is in LCSSA form, any values used outside of the loop
+ // must pass through a PHI in the exit block, meaning that this check is
+ // sufficient to guarantee that no loop-variant values are used outside
+ // of the loop.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ Value* incoming = P->getIncomingValueForBlock(exitingBlock);
+ if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (!IsLoopInvariantInst(I, L))
+ return false;
+
+ BI++;
+ }
+
+ // Make sure that no instructions in the block have potential side-effects.
+ // This includes instructions that could write to memory, and loads that are
+ // marked volatile. This could be made more aggressive by using aliasing
+ // information to identify readonly and readnone calls.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ++BI) {
+ if (BI->mayWriteToMemory())
+ return false;
+ else if (LoadInst* L = dyn_cast<LoadInst>(BI))
+ if (L->isVolatile())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time. Note
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+ // We can only remove the loop if there is a preheader that we can
+ // branch from after removing it.
+ BasicBlock* preheader = L->getLoopPreheader();
+ if (!preheader)
+ return false;
+
+ // We can't remove loops that contain subloops. If the subloops were dead,
+ // they would already have been removed in earlier executions of this pass.
+ if (L->begin() != L->end())
+ return false;
+
+ SmallVector<BasicBlock*, 4> exitingBlocks;
+ L->getExitingBlocks(exitingBlocks);
+
+ SmallVector<BasicBlock*, 4> exitBlocks;
+ L->getUniqueExitBlocks(exitBlocks);
+
+ // We require that the loop only have a single exit block. Otherwise, we'd
+ // be in the situation of needing to be able to solve statically which exit
+ // block will be branched to, or trying to preserve the branching logic in
+ // a loop invariant manner.
+ if (exitBlocks.size() != 1)
+ return false;
+
+ // Loops with multiple exits or exits that don't dominate the latch
+ // are too complicated to handle correctly.
+ if (!SingleDominatingExit(L, exitingBlocks))
+ return false;
+
+ // Finally, we have to check that the loop really is dead.
+ if (!IsLoopDead(L, exitingBlocks, exitBlocks))
+ return false;
+
+ // Don't remove loops for which we can't solve the trip count.
+ // They could be infinite, in which case we'd be changing program behavior.
+ ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ SCEVHandle S = SE.getIterationCount(L);
+ if (isa<SCEVCouldNotCompute>(S))
+ return false;
+
+ // Now that we know the removal is safe, remove the loop by changing the
+ // branch from the preheader to go to the single exit block.
+ BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock* exitingBlock = exitingBlocks[0];
+
+ // Because we're deleting a large chunk of code at once, the sequence in which
+ // we remove things is very important to avoid invalidation issues. Don't
+ // mess with this unless you have good reason and know what you're doing.
+
+ // Move simple loop-invariant expressions out of the loop, since they
+ // might be needed by the exit phis.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ) {
+ Instruction* I = BI++;
+ if (!I->use_empty() && IsLoopInvariantInst(I, L))
+ I->moveBefore(preheader->getTerminator());
+ }
+
+ // Connect the preheader directly to the exit block.
+ TerminatorInst* TI = preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+ // Rewrite phis in the exit block to get their inputs from
+ // the preheader instead of the exiting block.
+ BasicBlock::iterator BI = exitBlock->begin();
+ while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ P->replaceUsesOfWith(exitingBlock, preheader);
+ BI++;
+ }
+
+ // Update the dominator tree and remove the instructions and blocks that will
+ // be deleted from the reference counting scheme.
+ DominatorTree& DT = getAnalysis<DominatorTree>();
+ SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI) {
+ // Move all of the block's children to be children of the preheader, which
+ // allows us to remove the domtree entry for the block.
+ ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ DE = ChildNodes.end(); DI != DE; ++DI)
+ DT.changeImmediateDominator(*DI, DT[preheader]);
+
+ ChildNodes.clear();
+ DT.eraseNode(*LI);
+
+ // Remove instructions that we're deleting from ScalarEvolution.
+ for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+ BI != BE; ++BI)
+ SE.deleteValueFromRecords(BI);
+
+ SE.deleteValueFromRecords(*LI);
+
+ // Remove the block from the reference counting scheme, so that we can
+ // delete it freely later.
+ (*LI)->dropAllReferences();
+ }
+
+ // Erase the instructions and the blocks without having to worry
+ // about ordering because we already dropped the references.
+ // NOTE: This iteration is safe because erasing the block does not remove its
+ // entry from the loop's block list. We do that in the next section.
+ for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+ LI != LE; ++LI)
+ (*LI)->eraseFromParent();
+
+ // Finally, the blocks from loopinfo. This has to happen late because
+ // otherwise our loop iterators won't work.
+ LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ SmallPtrSet<BasicBlock*, 8> blocks;
+ blocks.insert(L->block_begin(), L->block_end());
+ for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+ E = blocks.end(); I != E; ++I)
+ loopInfo.removeBlock(*I);
+
+ // The last step is to inform the loop pass manager that we've
+ // eliminated this loop.
+ LPM.deleteLoopFromQueue(L);
+
+ NumDeleted++;
+
+ return true;
+}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopIndexSplit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopIndexSplit.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopIndexSplit.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopIndexSplit.cpp Sun Jul 6 15:45:41 2008
@@ -195,11 +195,12 @@
// Induction variable's final loop exit value operand number in exit condition..
unsigned ExitValueNum;
};
-
- char LoopIndexSplit::ID = 0;
- RegisterPass<LoopIndexSplit> X ("loop-index-split", "Index Split Loops");
}
+char LoopIndexSplit::ID = 0;
+static RegisterPass<LoopIndexSplit>
+X("loop-index-split", "Index Split Loops");
+
LoopPass *llvm::createLoopIndexSplitPass() {
return new LoopIndexSplit();
}
@@ -580,7 +581,7 @@
ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
SD.SplitValue, ExitValue, "lisplit",
Terminator);
- Instruction *NSplitCond = BinaryOperator::createAnd(C1, C2, "lisplit",
+ Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit",
Terminator);
SD.SplitCondition->replaceAllUsesWith(NSplitCond);
SD.SplitCondition->eraseFromParent();
@@ -595,11 +596,27 @@
if (isa<PHINode>(I) || I == LTerminator)
continue;
- if (I == IndVarIncrement)
- I->replaceAllUsesWith(ExitValue);
- else
+ if (I == IndVarIncrement) {
+ // Replace induction variable increment if it is not used outside
+ // the loop.
+ bool UsedOutsideLoop = false;
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ if (Instruction *Use = dyn_cast<Instruction>(UI))
+ if (!L->contains(Use->getParent())) {
+ UsedOutsideLoop = true;
+ break;
+ }
+ }
+ if (!UsedOutsideLoop) {
+ I->replaceAllUsesWith(ExitValue);
+ I->eraseFromParent();
+ }
+ }
+ else {
I->replaceAllUsesWith(UndefValue::get(I->getType()));
- I->eraseFromParent();
+ I->eraseFromParent();
+ }
}
LPM->deleteLoopFromQueue(L);
@@ -768,7 +785,7 @@
//
if (ExitCondition->getPredicate() == ICmpInst::ICMP_SLT
|| ExitCondition->getPredicate() == ICmpInst::ICMP_ULT) {
- Value *A = BinaryOperator::createAdd(NV, ConstantInt::get(Ty, 1, Sign),
+ Value *A = BinaryOperator::CreateAdd(NV, ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
A, UB,"lsplit,c", PHTerminator);
@@ -820,7 +837,7 @@
//
else if (ExitCondition->getPredicate() == ICmpInst::ICMP_SLE
|| ExitCondition->getPredicate() == ICmpInst::ICMP_ULE) {
- Value *S = BinaryOperator::createSub(NV, ConstantInt::get(Ty, 1, Sign),
+ Value *S = BinaryOperator::CreateSub(NV, ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
S, UB, "lsplit.c", PHTerminator);
@@ -856,7 +873,7 @@
// LOOP_BODY
//
{
- Value *A = BinaryOperator::createAdd(NV, ConstantInt::get(Ty, 1, Sign),
+ Value *A = BinaryOperator::CreateAdd(NV, ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
A, StartValue, "lsplit.c", PHTerminator);
@@ -1124,6 +1141,11 @@
BasicBlock *Succ0 = SplitTerminator->getSuccessor(0);
BasicBlock *Succ1 = SplitTerminator->getSuccessor(1);
+ // If split block does not dominate the latch then this is not a diamond.
+ // Such loop may not benefit from index split.
+ if (!DT->dominates(SplitCondBlock, Latch))
+ return false;
+
// Finally this split condition is safe only if merge point for
// split condition branch is loop latch. This check along with previous
// check, to ensure that exit condition is in either loop latch or header,
@@ -1207,7 +1229,7 @@
// A;
// for (i = max(LB, BSV); i < UB; ++i)
// B;
- BSV = BinaryOperator::createAdd(SD.SplitValue,
+ BSV = BinaryOperator::CreateAdd(SD.SplitValue,
ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
AEV = BSV;
@@ -1238,7 +1260,7 @@
// B;
// for (i = max(LB, BSV); i < UB; ++i)
// A;
- BSV = BinaryOperator::createAdd(SD.SplitValue,
+ BSV = BinaryOperator::CreateAdd(SD.SplitValue,
ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
AEV = BSV;
@@ -1266,7 +1288,7 @@
// A;
// for (i = max(LB, BSV); i <= UB; ++i)
// B;
- AEV = BinaryOperator::createSub(SD.SplitValue,
+ AEV = BinaryOperator::CreateSub(SD.SplitValue,
ConstantInt::get(Ty, 1, Sign),
"lsplit.sub", PHTerminator);
break;
@@ -1282,7 +1304,7 @@
// A;
// for (i = max(LB, BSV); i <= UB; ++i)
// B;
- BSV = BinaryOperator::createAdd(SD.SplitValue,
+ BSV = BinaryOperator::CreateAdd(SD.SplitValue,
ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
break;
@@ -1298,7 +1320,7 @@
// B;
// for (i = max(LB, BSV); i <= UB; ++i)
// A;
- BSV = BinaryOperator::createAdd(SD.SplitValue,
+ BSV = BinaryOperator::CreateAdd(SD.SplitValue,
ConstantInt::get(Ty, 1, Sign),
"lsplit.add", PHTerminator);
break;
@@ -1315,7 +1337,7 @@
// B;
// for (i = max(LB, BSV); i <= UB; ++i)
// A;
- AEV = BinaryOperator::createSub(SD.SplitValue,
+ AEV = BinaryOperator::CreateSub(SD.SplitValue,
ConstantInt::get(Ty, 1, Sign),
"lsplit.sub", PHTerminator);
break;
@@ -1377,7 +1399,7 @@
BasicBlock *SplitCondBlock = SD.SplitCondition->getParent();
- // Unable to handle triange loops at the moment.
+ // Unable to handle triangle loops at the moment.
// In triangle loop, split condition is in header and one of the
// the split destination is loop latch. If split condition is EQ
// then such loops are already handle in processOneIterationLoop().
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopRotation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopRotation.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopRotation.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopRotation.cpp Sun Jul 6 15:45:41 2008
@@ -102,10 +102,10 @@
LPPassManager *LPM_Ptr;
SmallVector<RenameData, MAX_HEADER_SIZE> LoopHeaderInfo;
};
-
- char LoopRotate::ID = 0;
- RegisterPass<LoopRotate> X ("loop-rotate", "Rotate Loops");
}
+
+char LoopRotate::ID = 0;
+static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
LoopPass *llvm::createLoopRotatePass() { return new LoopRotate(); }
@@ -164,7 +164,7 @@
// Check size of original header and reject
// loop if it is very big.
- if (OrigHeader->getInstList().size() > MAX_HEADER_SIZE)
+ if (OrigHeader->size() > MAX_HEADER_SIZE)
return false;
// Now, this loop is suitable for rotation.
@@ -208,10 +208,10 @@
// Create new PHI node with two incoming values for NewHeader.
// One incoming value is from OrigLatch (through OrigHeader) and
// second incoming value is from original pre-header.
- PHINode *NH = PHINode::Create(In->getType(), In->getName());
+ PHINode *NH = PHINode::Create(In->getType(), In->getName(),
+ NewHeader->begin());
NH->addIncoming(PN->getIncomingValueForBlock(OrigLatch), OrigHeader);
NH->addIncoming(NPV, OrigPreHeader);
- NewHeader->getInstList().push_front(NH);
// "In" can be replaced by NH at various places.
LoopHeaderInfo.push_back(RenameData(In, NPV, NH));
@@ -249,14 +249,36 @@
// create new PHINode for this instruction.
Instruction *NewHeaderReplacement = NULL;
if (usedOutsideOriginalHeader(In)) {
- PHINode *PN = PHINode::Create(In->getType(), In->getName());
- PN->addIncoming(In, OrigHeader);
- PN->addIncoming(C, OrigPreHeader);
- NewHeader->getInstList().push_front(PN);
- NewHeaderReplacement = PN;
- }
-
- // "In" can be replaced by NPH or NH at various places.
+ // FIXME: remove this when we have first-class aggregates.
+ if (isa<StructType>(In->getType())) {
+ // Can't create PHI nodes for this type. If there are any getResults
+ // not defined in this block, move them back to this block. PHI
+ // nodes will be created for all getResults later.
+ BasicBlock::iterator InsertPoint;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(In)) {
+ InsertPoint = II->getNormalDest()->getFirstNonPHI();
+ } else {
+ InsertPoint = I; // call
+ ++InsertPoint;
+ }
+ for (Value::use_iterator UI = In->use_begin(), UE = In->use_end();
+ UI != UE; ++UI) {
+ GetResultInst *InGR = cast<GetResultInst>(UI);
+ if (InGR->getParent() != OrigHeader) {
+ // Move InGR to immediately after the call or in the normal dest of
+ // the invoke. It will be picked up, cloned and PHI'd on the next
+ // iteration.
+ InGR->moveBefore(InsertPoint);
+ }
+ }
+ } else {
+ PHINode *PN = PHINode::Create(In->getType(), In->getName(),
+ NewHeader->begin());
+ PN->addIncoming(In, OrigHeader);
+ PN->addIncoming(C, OrigPreHeader);
+ NewHeaderReplacement = PN;
+ }
+ }
LoopHeaderInfo.push_back(RenameData(In, C, NewHeaderReplacement));
}
@@ -336,10 +358,10 @@
} else {
// Used outside Exit block. Create a new PHI node from exit block
// to receive value from ne new header ane pre header.
- PHINode *PN = PHINode::Create(U->getType(), U->getName());
+ PHINode *PN = PHINode::Create(U->getType(), U->getName(),
+ Exit->begin());
PN->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
PN->addIncoming(OldPhi, OrigHeader);
- Exit->getInstList().push_front(PN);
U->replaceUsesOfWith(OldPhi, PN);
}
}
@@ -447,7 +469,8 @@
// Right now original pre-header has two successors, new header and
// exit block. Insert new block between original pre-header and
// new header such that loop's new pre-header has only one successor.
- BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph", OrigHeader->getParent(),
+ BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph",
+ OrigHeader->getParent(),
NewHeader);
LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
if (Loop *PL = LI.getLoopFor(OrigPreHeader))
@@ -560,14 +583,15 @@
BasicBlock::iterator I = Exit->begin(), E = Exit->end();
PHINode *PN = NULL;
for (; (PN = dyn_cast<PHINode>(I)); ++I) {
- PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName());
unsigned N = PN->getNumIncomingValues();
for (unsigned index = 0; index < N; ++index)
if (PN->getIncomingBlock(index) == NExit) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(),
+ NExit->begin());
NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch());
PN->setIncomingValue(index, NewPN);
PN->setIncomingBlock(index, NExit);
- NExit->getInstList().push_front(NewPN);
+ break;
}
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopStrengthReduce.cpp Sun Jul 6 15:45:41 2008
@@ -31,6 +31,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
@@ -136,7 +137,7 @@
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
- SmallPtrSet<Instruction*,16> DeadInsts;
+ SetVector<Instruction*> DeadInsts;
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
@@ -192,12 +193,14 @@
void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L, bool isOnlyStride);
- void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*,16> &Insts);
+ void DeleteTriviallyDeadInstructions(SetVector<Instruction*> &Insts);
};
- char LoopStrengthReduce::ID = 0;
- RegisterPass<LoopStrengthReduce> X("loop-reduce", "Loop Strength Reduction");
}
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
+
LoopPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
@@ -224,10 +227,10 @@
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void LoopStrengthReduce::
-DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*,16> &Insts) {
+DeleteTriviallyDeadInstructions(SetVector<Instruction*> &Insts) {
while (!Insts.empty()) {
- Instruction *I = *Insts.begin();
- Insts.erase(I);
+ Instruction *I = Insts.back();
+ Insts.pop_back();
if (PHINode *PN = dyn_cast<PHINode>(I)) {
// If all incoming values to the Phi are the same, we can replace the Phi
@@ -235,8 +238,8 @@
if (Value *PNV = PN->hasConstantValue()) {
if (Instruction *U = dyn_cast<Instruction>(PNV))
Insts.insert(U);
- PN->replaceAllUsesWith(PNV);
SE->deleteValueFromRecords(PN);
+ PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
Changed = true;
continue;
@@ -244,8 +247,8 @@
}
if (isInstructionTriviallyDead(I)) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (Instruction *U = dyn_cast<Instruction>(*i))
Insts.insert(U);
SE->deleteValueFromRecords(I);
I->eraseFromParent();
@@ -287,24 +290,25 @@
gep_type_iterator GTI = gep_type_begin(GEP);
- for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+ i != e; ++i, ++GTI) {
// If this is a use of a recurrence that we can analyze, and it comes before
// Op does in the GEP operand list, we will handle this when we process this
// operand.
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(GEP->getOperand(i))->getZExtValue();
+ unsigned Idx = cast<ConstantInt>(*i)->getZExtValue();
uint64_t Offset = SL->getElementOffset(Idx);
GEPVal = SE->getAddExpr(GEPVal,
SE->getIntegerSCEV(Offset, UIntPtrTy));
} else {
unsigned GEPOpiBits =
- GEP->getOperand(i)->getType()->getPrimitiveSizeInBits();
+ (*i)->getType()->getPrimitiveSizeInBits();
unsigned IntPtrBits = UIntPtrTy->getPrimitiveSizeInBits();
Instruction::CastOps opcode = (GEPOpiBits < IntPtrBits ?
Instruction::SExt : (GEPOpiBits > IntPtrBits ? Instruction::Trunc :
Instruction::BitCast));
- Value *OpVal = getCastedVersionOf(opcode, GEP->getOperand(i));
+ Value *OpVal = getCastedVersionOf(opcode, *i);
SCEVHandle Idx = SE->getSCEV(OpVal);
uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType());
@@ -375,7 +379,7 @@
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
Loop *L, DominatorTree *DT, Pass *P,
- SmallPtrSet<Instruction*,16> &DeadInsts){
+ SetVector<Instruction*> &DeadInsts){
// If the user is in the loop, use the preinc value.
if (L->contains(User->getParent())) return false;
@@ -541,8 +545,9 @@
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
// to it.
void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
+ Instruction *InsertPt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallPtrSet<Instruction*,16> &DeadInsts);
+ SetVector<Instruction*> &DeadInsts);
Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
SCEVExpander &Rewriter,
@@ -581,9 +586,8 @@
}
// If there is no immediate value, skip the next part.
- if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
- if (SC->getValue()->isZero())
- return Rewriter.expandCodeFor(NewBase, BaseInsertPt);
+ if (Imm->isZero())
+ return Rewriter.expandCodeFor(NewBase, BaseInsertPt);
Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt);
@@ -601,10 +605,14 @@
// Once we rewrite the code to insert the new IVs we want, update the
// operands of Inst to use the new expression 'NewBase', with 'Imm' added
-// to it.
+// to it. NewBasePt is the last instruction which contributes to the
+// value of NewBase in the case that it's a diffferent instruction from
+// the PHI that NewBase is computed from, or null otherwise.
+//
void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
+ Instruction *NewBasePt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallPtrSet<Instruction*,16> &DeadInsts) {
+ SetVector<Instruction*> &DeadInsts) {
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
@@ -618,7 +626,11 @@
// value will be pinned to live somewhere after the original computation.
// In this case, we have to back off.
if (!isUseOfPostIncrementedValue) {
- if (Instruction *OpInst = dyn_cast<Instruction>(OperandValToReplace)) {
+ if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
+ InsertPt = NewBasePt;
+ ++InsertPt;
+ } else if (Instruction *OpInst
+ = dyn_cast<Instruction>(OperandValToReplace)) {
InsertPt = OpInst;
while (isa<PHINode>(InsertPt)) ++InsertPt;
}
@@ -878,8 +890,7 @@
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
- } else if (!isa<SCEVConstant>(Expr) ||
- !cast<SCEVConstant>(Expr)->getValue()->isZero()) {
+ } else if (!Expr->isZero()) {
// Do not add zero.
SubExprs.push_back(Expr);
}
@@ -966,14 +977,6 @@
return Result;
}
-/// isZero - returns true if the scalar evolution expression is zero.
-///
-static bool isZero(const SCEVHandle &V) {
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V))
- return SC->getValue()->isZero();
- return false;
-}
-
/// ValidStride - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
@@ -996,7 +999,7 @@
TargetLowering::AddrMode AM;
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
- AM.HasBaseReg = HasBaseReg || !isZero(UsersToProcess[i].Base);
+ AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
@@ -1056,7 +1059,7 @@
IE = SI->second.IVs.end(); II != IE; ++II)
// FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion.
- if (isZero(II->Base) &&
+ if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Scale;
@@ -1109,19 +1112,14 @@
if (II->getOperand(1) == OperandVal)
isAddress = true;
break;
- case Intrinsic::x86_sse2_loadh_pd:
- case Intrinsic::x86_sse2_loadl_pd:
- if (II->getOperand(2) == OperandVal)
- isAddress = true;
- break;
}
}
return isAddress;
}
// CollectIVUsers - Transform our list of users and offsets to a bit more
-// complex table. In this new vector, each 'BasedUser' contains 'Base' the base
-// of the strided accessas well as the old information from Uses. We
+// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
+// of the strided accesses, as well as the old information from Uses. We
// progressively move information from the Base field to the Imm field, until
// we eventually have the full access expression to rewrite the use.
SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
@@ -1225,7 +1223,7 @@
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
- bool HaveCommonExprs = !isZero(CommonExprs);
+ bool HaveCommonExprs = !CommonExprs->isZero();
// If all uses are addresses, check if it is possible to reuse an IV with a
// stride that is a factor of this stride. And that the multiple is a number
@@ -1398,6 +1396,16 @@
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
+ // If we had to insert new instrutions for RewriteOp, we have to
+ // consider that they may not have been able to end up immediately
+ // next to RewriteOp, because non-PHI instructions may never precede
+ // PHI instructions in a block. In this case, remember where the last
+ // instruction was inserted so that if we're replacing a different
+ // PHI node, we can use the later point to expand the final
+ // RewriteExpr.
+ Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
+ if (RewriteOp == NewPHI) NewBasePt = 0;
+
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
@@ -1424,7 +1432,8 @@
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
- User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this,
+ User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
+ Rewriter, L, this,
DeadInsts);
// Mark old value we replaced as possibly dead, so that it is elminated
@@ -1577,8 +1586,8 @@
? UIntPtrTy->getPrimitiveSizeInBits()
: NewCmpTy->getPrimitiveSizeInBits();
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
- // Check if it is possible to rewrite it using a iv / stride of a smaller
- // integer type.
+ // Check if it is possible to rewrite it using
+ // an iv / stride of a smaller integer type.
bool TruncOk = false;
if (NewCmpTy->isInteger()) {
unsigned Bits = NewTyBits;
@@ -1610,7 +1619,7 @@
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
if (AllUsesAreAddresses &&
- ValidStride(!isZero(CommonExprs), Scale, UsersToProcess)) {
+ ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) {
NewCmpVal = CmpVal;
continue;
}
@@ -1625,6 +1634,18 @@
}
}
+ // Forgo this transformation if it the increment happens to be
+ // unfortunately positioned after the condition, and the condition
+ // has multiple uses which prevent it from being moved immediately
+ // before the branch. See
+ // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
+ // for an example of this situation.
+ if (!Cond->hasOneUse())
+ for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
+ I != E; ++I)
+ if (I == NewIncV)
+ return Cond;
+
if (NewCmpVal != CmpVal) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
@@ -1636,14 +1657,14 @@
RHS = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy);
}
// Insert new compare instruction.
- Cond = new ICmpInst(Predicate, NewIncV, RHS);
- Cond->setName(L->getHeader()->getName() + ".termcond");
- OldCond->getParent()->getInstList().insert(OldCond, Cond);
+ Cond = new ICmpInst(Predicate, NewIncV, RHS,
+ L->getHeader()->getName() + ".termcond",
+ OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.insert(cast<Instruction>(CondUse->OperandValToReplace));
- OldCond->replaceAllUsesWith(Cond);
SE->deleteValueFromRecords(OldCond);
+ OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IVUsesByStride[*CondStride].Users.pop_back();
@@ -1761,7 +1782,7 @@
#endif
// IVsByStride keeps IVs for one particular loop.
- IVsByStride.clear();
+ assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
@@ -1778,36 +1799,39 @@
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
}
+ // We're done analyzing this loop; release all the state we built up for it.
+ CastedPointers.clear();
+ IVUsesByStride.clear();
+ IVsByStride.clear();
+ StrideOrder.clear();
+
// Clean up after ourselves
if (!DeadInsts.empty()) {
DeleteTriviallyDeadInstructions(DeadInsts);
BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN;
- while ((PN = dyn_cast<PHINode>(I))) {
- ++I; // Preincrement iterator to avoid invalidating it when deleting PN.
-
- // At this point, we know that we have killed one or more GEP
- // instructions. It is worth checking to see if the cann indvar is also
- // dead, so that we can remove it as well. The requirements for the cann
- // indvar to be considered dead are:
- // 1. the cann indvar has one use
- // 2. the use is an add instruction
- // 3. the add has one use
- // 4. the add is used by the cann indvar
- // If all four cases above are true, then we can remove both the add and
- // the cann indvar.
+ while (PHINode *PN = dyn_cast<PHINode>(I++)) {
+ // At this point, we know that we have killed one or more IV users.
+ // It is worth checking to see if the cann indvar is also
+ // dead, so that we can remove it as well.
+ //
+ // We can remove a PHI if it is on a cycle in the def-use graph
+ // where each node in the cycle has degree one, i.e. only one use,
+ // and is an instruction with no side effects.
+ //
// FIXME: this needs to eliminate an induction variable even if it's being
// compared against some value to decide loop termination.
if (PN->hasOneUse()) {
- Instruction *BO = dyn_cast<Instruction>(*PN->use_begin());
- if (BO && (isa<BinaryOperator>(BO) || isa<CmpInst>(BO))) {
- if (BO->hasOneUse() && PN == *(BO->use_begin())) {
- DeadInsts.insert(BO);
- // Break the cycle, then delete the PHI.
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ for (Instruction *J = dyn_cast<Instruction>(*PN->use_begin());
+ J && J->hasOneUse() && !J->mayWriteToMemory();
+ J = dyn_cast<Instruction>(*J->use_begin())) {
+ // If we find the original PHI, we've discovered a cycle.
+ if (J == PN) {
+ // Break the cycle and mark the PHI for deletion.
SE->deleteValueFromRecords(PN);
- PN->eraseFromParent();
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ DeadInsts.insert(PN);
+ break;
}
}
}
@@ -1815,8 +1839,5 @@
DeleteTriviallyDeadInstructions(DeadInsts);
}
- CastedPointers.clear();
- IVUsesByStride.clear();
- StrideOrder.clear();
return false;
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnroll.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnroll.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnroll.cpp Sun Jul 6 15:45:41 2008
@@ -10,53 +10,31 @@
// This pass implements a simple loop unroller. It works best when loops have
// been canonicalized by the -indvars pass, allowing it to determine the trip
// counts of loops easily.
-//
-// This pass will multi-block loops only if they contain no non-unrolled
-// subloops. The process of unrolling can produce extraneous basic blocks
-// linked with unconditional branches. This will be corrected in the future.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IntrinsicInst.h"
-#include <algorithm>
+#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <climits>
-#include <cstdio>
+
using namespace llvm;
-STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
-STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+ cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+ cl::desc("Use this unroll count for all loops, for testing purposes"));
namespace {
- cl::opt<unsigned>
- UnrollThreshold
- ("unroll-threshold", cl::init(100), cl::Hidden,
- cl::desc("The cut-off point for automatic loop unrolling"));
-
- cl::opt<unsigned>
- UnrollCount
- ("unroll-count", cl::init(0), cl::Hidden,
- cl::desc("Use this unroll count for all loops, for testing purposes"));
-
class VISIBILITY_HIDDEN LoopUnroll : public LoopPass {
- LoopInfo *LI; // The current loop information
public:
static char ID; // Pass ID, replacement for typeid
LoopUnroll() : LoopPass((intptr_t)&ID) {}
@@ -67,8 +45,6 @@
static const unsigned NoThreshold = UINT_MAX;
bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool unrollLoop(Loop *L, unsigned Count, unsigned Threshold);
- BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB);
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
@@ -79,19 +55,27 @@
AU.addRequired<LoopInfo>();
AU.addPreservedID(LCSSAID);
AU.addPreserved<LoopInfo>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
}
};
- char LoopUnroll::ID = 0;
- RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
}
+char LoopUnroll::ID = 0;
+static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+
LoopPass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L) {
unsigned Size = 0;
- for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
- BasicBlock *BB = L->getBlocks()[i];
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
Instruction *Term = BB->getTerminator();
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (isa<PHINode>(I) && BB == L->getHeader()) {
@@ -122,136 +106,18 @@
return Size;
}
-// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by ValueMap.
-//
-static inline void RemapInstruction(Instruction *I,
- DenseMap<const Value *, Value*> &ValueMap) {
- for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
- Value *Op = I->getOperand(op);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
- if (It != ValueMap.end()) Op = It->second;
- I->setOperand(op, Op);
- }
-}
-
-// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
-// only has one predecessor, and that predecessor only has one successor.
-// Returns the new combined block.
-BasicBlock *LoopUnroll::FoldBlockIntoPredecessor(BasicBlock *BB) {
- // Merge basic blocks into their predecessor if there is only one distinct
- // pred, and if there is only one distinct successor of the predecessor, and
- // if there are no PHI nodes.
- //
- BasicBlock *OnlyPred = BB->getSinglePredecessor();
- if (!OnlyPred) return 0;
-
- if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
- return 0;
-
- DOUT << "Merging: " << *BB << "into: " << *OnlyPred;
-
- // Resolve any PHI nodes at the start of the block. They are all
- // guaranteed to have exactly one entry if they exist, unless there are
- // multiple duplicate (but guaranteed to be equal) entries for the
- // incoming edges. This occurs when there are multiple edges from
- // OnlyPred to OnlySucc.
- //
- while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- BB->getInstList().pop_front(); // Delete the phi node...
- }
-
- // Delete the unconditional branch from the predecessor...
- OnlyPred->getInstList().pop_back();
-
- // Move all definitions in the successor to the predecessor...
- OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
- // Make all PHI nodes that referred to BB now refer to Pred as their
- // source...
- BB->replaceAllUsesWith(OnlyPred);
-
- std::string OldName = BB->getName();
-
- // Erase basic block from the function...
- LI->removeBlock(BB);
- BB->eraseFromParent();
-
- // Inherit predecessor's name if it exists...
- if (!OldName.empty() && !OnlyPred->hasName())
- OnlyPred->setName(OldName);
-
- return OnlyPred;
-}
-
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
- LI = &getAnalysis<LoopInfo>();
-
- // Unroll the loop.
- if (!unrollLoop(L, UnrollCount, UnrollThreshold))
- return false;
-
- // Update the loop information for this loop.
- // If we completely unrolled the loop, remove it from the parent.
- if (L->getNumBackEdges() == 0)
- LPM.deleteLoopFromQueue(L);
-
- return true;
-}
-
-/// Unroll the given loop by UnrollCount, or by a heuristically-determined
-/// value if Count is zero. If Threshold is not NoThreshold, it is a value
-/// to limit code size expansion. If the loop size would expand beyond the
-/// threshold value, unrolling is suppressed. The return value is true if
-/// any transformations are performed.
-///
-bool LoopUnroll::unrollLoop(Loop *L, unsigned Count, unsigned Threshold) {
assert(L->isLCSSAForm());
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
BasicBlock *Header = L->getHeader();
- BasicBlock *LatchBlock = L->getLoopLatch();
- BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
DOUT << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n";
- if (!BI || BI->isUnconditional()) {
- // The loop-rotate pass can be helpful to avoid this in many cases.
- DOUT << " Can't unroll; loop not terminated by a conditional branch.\n";
- return false;
- }
-
- // Determine the trip count and/or trip multiple. A TripCount value of zero
- // is used to mean an unknown trip count. The TripMultiple value is the
- // greatest known integer multiple of the trip count.
- unsigned TripCount = 0;
- unsigned TripMultiple = 1;
- if (Value *TripCountValue = L->getTripCount()) {
- if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCountValue)) {
- // Guard against huge trip counts. This also guards against assertions in
- // APInt from the use of getZExtValue, below.
- if (TripCountC->getValue().getActiveBits() <= 32) {
- TripCount = (unsigned)TripCountC->getZExtValue();
- }
- } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCountValue)) {
- switch (BO->getOpcode()) {
- case BinaryOperator::Mul:
- if (ConstantInt *MultipleC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
- if (MultipleC->getValue().getActiveBits() <= 32) {
- TripMultiple = (unsigned)MultipleC->getZExtValue();
- }
- }
- break;
- default: break;
- }
- }
- }
- if (TripCount != 0)
- DOUT << " Trip Count = " << TripCount << "\n";
- if (TripMultiple != 1)
- DOUT << " Trip Multiple = " << TripMultiple << "\n";
-
+ // Find trip count
+ unsigned TripCount = L->getSmallConstantTripCount();
+ unsigned Count = UnrollCount;
+
// Automatically select an unroll count.
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
@@ -264,249 +130,30 @@
}
}
- // Effectively "DCE" unrolled iterations that are beyond the tripcount
- // and will never be executed.
- if (TripCount != 0 && Count > TripCount)
- Count = TripCount;
-
- assert(Count > 0);
- assert(TripMultiple > 0);
- assert(TripCount == 0 || TripCount % TripMultiple == 0);
-
// Enforce the threshold.
- if (Threshold != NoThreshold) {
+ if (UnrollThreshold != NoThreshold) {
unsigned LoopSize = ApproximateLoopSize(L);
DOUT << " Loop Size = " << LoopSize << "\n";
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > Threshold) {
+ if (TripCount != 1 && Size > UnrollThreshold) {
DOUT << " TOO LARGE TO UNROLL: "
- << Size << ">" << Threshold << "\n";
+ << Size << ">" << UnrollThreshold << "\n";
return false;
}
}
- // Are we eliminating the loop control altogether?
- bool CompletelyUnroll = Count == TripCount;
-
- // If we know the trip count, we know the multiple...
- unsigned BreakoutTrip = 0;
- if (TripCount != 0) {
- BreakoutTrip = TripCount % Count;
- TripMultiple = 0;
- } else {
- // Figure out what multiple to use.
- BreakoutTrip = TripMultiple =
- (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
- }
-
- if (CompletelyUnroll) {
- DOUT << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << TripCount << "!\n";
- } else {
- DOUT << "UNROLLING loop %" << Header->getName()
- << " by " << Count;
- if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
- DOUT << " with a breakout at trip " << BreakoutTrip;
- } else if (TripMultiple != 1) {
- DOUT << " with " << TripMultiple << " trips per branch";
- }
- DOUT << "!\n";
- }
-
- std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
-
- bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
- BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
-
- // For the first iteration of the loop, we should use the precloned values for
- // PHI nodes. Insert associations now.
- typedef DenseMap<const Value*, Value*> ValueMapTy;
- ValueMapTy LastValueMap;
- std::vector<PHINode*> OrigPHINode;
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- OrigPHINode.push_back(PN);
- if (Instruction *I =
- dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
- if (L->contains(I->getParent()))
- LastValueMap[I] = I;
- }
-
- std::vector<BasicBlock*> Headers;
- std::vector<BasicBlock*> Latches;
- Headers.push_back(Header);
- Latches.push_back(LatchBlock);
-
- for (unsigned It = 1; It != Count; ++It) {
- char SuffixBuffer[100];
- sprintf(SuffixBuffer, ".%d", It);
-
- std::vector<BasicBlock*> NewBlocks;
-
- for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
- E = LoopBlocks.end(); BB != E; ++BB) {
- ValueMapTy ValueMap;
- BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
- Header->getParent()->getBasicBlockList().push_back(New);
-
- // Loop over all of the PHI nodes in the block, changing them to use the
- // incoming values from the previous block.
- if (*BB == Header)
- for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
- Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
- if (Instruction *InValI = dyn_cast<Instruction>(InVal))
- if (It > 1 && L->contains(InValI->getParent()))
- InVal = LastValueMap[InValI];
- ValueMap[OrigPHINode[i]] = InVal;
- New->getInstList().erase(NewPHI);
- }
-
- // Update our running map of newest clones
- LastValueMap[*BB] = New;
- for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
- VI != VE; ++VI)
- LastValueMap[VI->first] = VI->second;
-
- L->addBasicBlockToLoop(New, LI->getBase());
-
- // Add phi entries for newly created values to all exit blocks except
- // the successor of the latch block. The successor of the exit block will
- // be updated specially after unrolling all the way.
- if (*BB != LatchBlock)
- for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
- UI != UE;) {
- Instruction *UseInst = cast<Instruction>(*UI);
- ++UI;
- if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) {
- PHINode *phi = cast<PHINode>(UseInst);
- Value *Incoming = phi->getIncomingValueForBlock(*BB);
- phi->addIncoming(Incoming, New);
- }
- }
-
- // Keep track of new headers and latches as we create them, so that
- // we can insert the proper branches later.
- if (*BB == Header)
- Headers.push_back(New);
- if (*BB == LatchBlock) {
- Latches.push_back(New);
-
- // Also, clear out the new latch's back edge so that it doesn't look
- // like a new loop, so that it's amenable to being merged with adjacent
- // blocks later on.
- TerminatorInst *Term = New->getTerminator();
- assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
- assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
- Term->setSuccessor(!ContinueOnTrue, NULL);
- }
-
- NewBlocks.push_back(New);
- }
-
- // Remap all instructions in the most recent iteration
- for (unsigned i = 0; i < NewBlocks.size(); ++i) {
- BasicBlock *NB = NewBlocks[i];
- if (BasicBlock *UnwindDest = NB->getUnwindDest())
- NB->setUnwindDest(cast<BasicBlock>(LastValueMap[UnwindDest]));
-
- for (BasicBlock::iterator I = NB->begin(), E = NB->end(); I != E; ++I)
- RemapInstruction(I, LastValueMap);
- }
- }
-
- // The latch block exits the loop. If there are any PHI nodes in the
- // successor blocks, update them to use the appropriate values computed as the
- // last iteration of the loop.
- if (Count != 1) {
- SmallPtrSet<PHINode*, 8> Users;
- for (Value::use_iterator UI = LatchBlock->use_begin(),
- UE = LatchBlock->use_end(); UI != UE; ++UI)
- if (PHINode *phi = dyn_cast<PHINode>(*UI))
- Users.insert(phi);
-
- BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
- for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
- SI != SE; ++SI) {
- PHINode *PN = *SI;
- Value *InVal = PN->removeIncomingValue(LatchBlock, false);
- // If this value was defined in the loop, take the value defined by the
- // last iteration of the loop.
- if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
- if (L->contains(InValI->getParent()))
- InVal = LastValueMap[InVal];
- }
- PN->addIncoming(InVal, LastIterationBB);
- }
- }
-
- // Now, if we're doing complete unrolling, loop over the PHI nodes in the
- // original block, setting them to their incoming values.
- if (CompletelyUnroll) {
- BasicBlock *Preheader = L->getLoopPreheader();
- for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *PN = OrigPHINode[i];
- PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
- Header->getInstList().erase(PN);
- }
- }
-
- // Now that all the basic blocks for the unrolled iterations are in place,
- // set up the branches to connect them.
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The original branch was replicated in each unrolled iteration.
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = true;
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll && j == 0) {
- Dest = LoopExit;
- NeedConditional = false;
- }
-
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
- NeedConditional = false;
- }
+ // Unroll the loop.
+ Function *F = L->getHeader()->getParent();
+ if (!UnrollLoop(L, Count, LI, &LPM))
+ return false;
- if (NeedConditional) {
- // Update the conditional branch's successor for the following
- // iteration.
- Term->setSuccessor(!ContinueOnTrue, Dest);
- } else {
- Term->setUnconditionalDest(Dest);
- // Merge adjacent basic blocks, if possible.
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest)) {
- std::replace(Latches.begin(), Latches.end(), Dest, Fold);
- std::replace(Headers.begin(), Headers.end(), Dest, Fold);
- }
- }
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ DominatorTree *DT = getAnalysisToUpdate<DominatorTree>();
+ if (DT) {
+ DT->runOnFunction(*F);
+ DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>();
+ if (DF)
+ DF->runOnFunction(*F);
}
-
- // At this point, the code is well formed. We now do a quick sweep over the
- // inserted code, doing constant propagation and dead code elimination as we
- // go.
- const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
- for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
- BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
- for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
- Instruction *Inst = I++;
-
- if (isInstructionTriviallyDead(Inst))
- (*BB)->getInstList().erase(Inst);
- else if (Constant *C = ConstantFoldInstruction(Inst)) {
- Inst->replaceAllUsesWith(C);
- (*BB)->getInstList().erase(Inst);
- }
- }
-
- NumCompletelyUnrolled += CompletelyUnroll;
- ++NumUnrolled;
return true;
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/LoopUnswitch.cpp Sun Jul 6 15:45:41 2008
@@ -54,11 +54,11 @@
STATISTIC(NumTrivial , "Number of unswitches that are trivial");
STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
-namespace {
- cl::opt<unsigned>
- Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
- cl::init(10), cl::Hidden);
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+ cl::init(10), cl::Hidden);
+namespace {
class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass {
LoopInfo *LI; // Loop information
LPPassManager *LPM;
@@ -71,24 +71,28 @@
bool OptimizeForSize;
bool redoLoop;
+ Loop *currentLoop;
DominanceFrontier *DF;
DominatorTree *DT;
+ BasicBlock *loopHeader;
+ BasicBlock *loopPreheader;
- /// LoopDF - Loop's dominance frontier. This set is a collection of
- /// loop exiting blocks' DF member blocks. However this does set does not
- /// includes basic blocks that are inside loop.
- SmallPtrSet<BasicBlock *, 8> LoopDF;
-
- /// OrigLoopExitMap - This is used to map loop exiting block with
- /// corresponding loop exit block, before updating CFG.
- DenseMap<BasicBlock *, BasicBlock *> OrigLoopExitMap;
+ // LoopBlocks contains all of the basic blocks of the loop, including the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
+ // loop, in that order.
+ std::vector<BasicBlock*> LoopBlocks;
+ // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+ std::vector<BasicBlock*> NewBlocks;
+
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
- LoopPass((intptr_t)&ID), OptimizeForSize(Os), redoLoop(false) {}
+ LoopPass((intptr_t)&ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
+ loopPreheader(NULL) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool processLoop(Loop *L);
+ bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
@@ -115,18 +119,17 @@
LoopProcessWorklist.erase(I);
}
+ void initLoopData() {
+ loopHeader = currentLoop->getHeader();
+ loopPreheader = currentLoop->getLoopPreheader();
+ }
+
/// Split all of the edges from inside the loop to their exit blocks.
/// Update the appropriate Phi nodes as we do so.
- void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks,
- SmallVector<BasicBlock *, 8> &MiddleBlocks);
+ void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
- /// If BB's dominance frontier has a member that is not part of loop L then
- /// remove it. Add NewDFMember in BB's dominance frontier.
- void ReplaceLoopExternalDFMember(Loop *L, BasicBlock *BB,
- BasicBlock *NewDFMember);
-
- bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,Loop *L);
- unsigned getLoopUnswitchCost(Loop *L, Value *LIC);
+ bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+ unsigned getLoopUnswitchCost(Value *LIC);
void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock);
void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
@@ -143,10 +146,13 @@
void RemoveBlockIfDead(BasicBlock *BB,
std::vector<Instruction*> &Worklist, Loop *l);
void RemoveLoopFromHierarchy(Loop *L);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+ BasicBlock **LoopExit = 0);
+
};
- char LoopUnswitch::ID = 0;
- RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
}
+char LoopUnswitch::ID = 0;
+static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
LoopPass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -183,26 +189,27 @@
LPM = &LPM_Ref;
DF = getAnalysisToUpdate<DominanceFrontier>();
DT = getAnalysisToUpdate<DominatorTree>();
-
+ currentLoop = L;
bool Changed = false;
-
do {
+ assert(currentLoop->isLCSSAForm());
redoLoop = false;
- Changed |= processLoop(L);
+ Changed |= processCurrentLoop();
} while(redoLoop);
return Changed;
}
-/// processLoop - Do actual work and unswitch loop if possible and profitable.
-bool LoopUnswitch::processLoop(Loop *L) {
- assert(L->isLCSSAForm());
+/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
I != E; ++I) {
TerminatorInst *TI = (*I)->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -211,15 +218,17 @@
if (BI->isConditional()) {
// See if this, or some part of it, is loop invariant. If so, we can
// unswitch on it if we desire.
- Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), L, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(),
- L)) {
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue())) {
++NumBranches;
return true;
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), L, Changed);
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
if (LoopCond && SI->getNumCases() > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
@@ -228,7 +237,7 @@
if (!UnswitchedVals.insert(UnswitchVal))
continue;
- if (UnswitchIfProfitable(LoopCond, UnswitchVal, L)) {
+ if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
++NumSwitches;
return true;
}
@@ -239,17 +248,15 @@
for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
BBI != E; ++BBI)
if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), L, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(),
- L)) {
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
+ ConstantInt::getTrue())) {
++NumSelects;
return true;
}
}
}
-
- assert(L->isLCSSAForm());
-
return Changed;
}
@@ -314,9 +321,9 @@
/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
/// Cond == Val.
///
-static bool IsTrivialUnswitchCondition(Loop *L, Value *Cond, Constant **Val = 0,
- BasicBlock **LoopExit = 0) {
- BasicBlock *Header = L->getHeader();
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+ BasicBlock **LoopExit) {
+ BasicBlock *Header = currentLoop->getHeader();
TerminatorInst *HeaderTerm = Header->getTerminator();
BasicBlock *LoopExitBB = 0;
@@ -330,9 +337,11 @@
// latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
- if ((LoopExitBB = isTrivialLoopExitBlock(L, BI->getSuccessor(0)))) {
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
if (Val) *Val = ConstantInt::getTrue();
- } else if ((LoopExitBB = isTrivialLoopExitBlock(L, BI->getSuccessor(1)))) {
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
if (Val) *Val = ConstantInt::getFalse();
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
@@ -344,7 +353,8 @@
// side-effects. If so, determine the value of Cond that causes it to do
// this. Note that we can't trivially unswitch on the default case.
for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if ((LoopExitBB = isTrivialLoopExitBlock(L, SI->getSuccessor(i)))) {
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ SI->getSuccessor(i)))) {
// Okay, we found a trivial case, remember the value that is trivial.
if (Val) *Val = SI->getCaseValue(i);
break;
@@ -370,24 +380,25 @@
}
/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if
-/// we choose to unswitch the specified loop on the specified value.
+/// we choose to unswitch current loop on the specified value.
///
-unsigned LoopUnswitch::getLoopUnswitchCost(Loop *L, Value *LIC) {
+unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
// If the condition is trivial, always unswitch. There is no code growth for
// this case.
- if (IsTrivialUnswitchCondition(L, LIC))
+ if (IsTrivialUnswitchCondition(LIC))
return 0;
// FIXME: This is really overly conservative. However, more liberal
// estimations have thus far resulted in excessive unswitching, which is bad
// both in compile time and in code size. This should be replaced once
// someone figures out how a good estimation.
- return L->getBlocks().size();
+ return currentLoop->getBlocks().size();
unsigned Cost = 0;
// FIXME: this is brain dead. It should take into consideration code
// shrinkage.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
// Do not include empty blocks in the cost calculation. This happen due to
@@ -402,12 +413,12 @@
return Cost;
}
-/// UnswitchIfProfitable - We have found that we can unswitch L when
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,Loop *L){
- // Check to see if it would be profitable to unswitch this loop.
- unsigned Cost = getLoopUnswitchCost(L, LoopCond);
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
+ // Check to see if it would be profitable to unswitch current loop.
+ unsigned Cost = getLoopUnswitchCost(LoopCond);
// Do not do non-trivial unswitch while optimizing for size.
if (Cost && OptimizeForSize)
@@ -418,21 +429,27 @@
// resultant unswitched loops.
//
DOUT << "NOT unswitching loop %"
- << L->getHeader()->getName() << ", cost too high: "
- << L->getBlocks().size() << "\n";
+ << currentLoop->getHeader()->getName() << ", cost too high: "
+ << currentLoop->getBlocks().size() << "\n";
return false;
}
-
- // If this is a trivial condition to unswitch (which results in no code
- // duplication), do it now.
+
+ initLoopData();
+
Constant *CondVal;
BasicBlock *ExitBlock;
- if (IsTrivialUnswitchCondition(L, LoopCond, &CondVal, &ExitBlock)) {
- UnswitchTrivialCondition(L, LoopCond, CondVal, ExitBlock);
+ if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
} else {
- UnswitchNontrivialCondition(LoopCond, Val, L);
+ UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
}
-
+
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ Function *F = loopHeader->getParent();
+ if (DT)
+ DT->runOnFunction(*F);
+ if (DF)
+ DF->runOnFunction(*F);
return true;
}
@@ -449,87 +466,6 @@
}
}
-// CloneDomInfo - NewBB is cloned from Orig basic block. Now clone Dominator
-// Info.
-//
-// If Orig block's immediate dominator is mapped in VM then use corresponding
-// immediate dominator from the map. Otherwise Orig block's dominator is also
-// NewBB's dominator.
-//
-// OrigPreheader is loop pre-header before this pass started
-// updating CFG. NewPrehader is loops new pre-header. However, after CFG
-// manipulation, loop L may not exist. So rely on input parameter NewPreheader.
-void CloneDomInfo(BasicBlock *NewBB, BasicBlock *Orig,
- BasicBlock *NewPreheader, BasicBlock *OrigPreheader,
- BasicBlock *OrigHeader,
- DominatorTree *DT, DominanceFrontier *DF,
- DenseMap<const Value*, Value*> &VM) {
-
- // If NewBB alreay has found its place in domiantor tree then no need to do
- // anything.
- if (DT->getNode(NewBB))
- return;
-
- // If Orig does not have any immediate domiantor then its clone, NewBB, does
- // not need any immediate dominator.
- DomTreeNode *OrigNode = DT->getNode(Orig);
- if (!OrigNode)
- return;
- DomTreeNode *OrigIDomNode = OrigNode->getIDom();
- if (!OrigIDomNode)
- return;
-
- BasicBlock *OrigIDom = NULL;
-
- // If Orig is original loop header then its immediate dominator is
- // NewPreheader.
- if (Orig == OrigHeader)
- OrigIDom = NewPreheader;
-
- // If Orig is new pre-header then its immediate dominator is
- // original pre-header.
- else if (Orig == NewPreheader)
- OrigIDom = OrigPreheader;
-
- // Other as DT to find Orig's immediate dominator.
- else
- OrigIDom = OrigIDomNode->getBlock();
-
- // Initially use Orig's immediate dominator as NewBB's immediate dominator.
- BasicBlock *NewIDom = OrigIDom;
- DenseMap<const Value*, Value*>::iterator I = VM.find(OrigIDom);
- if (I != VM.end()) {
- NewIDom = cast<BasicBlock>(I->second);
-
- // If NewIDom does not have corresponding dominatore tree node then
- // get one.
- if (!DT->getNode(NewIDom))
- CloneDomInfo(NewIDom, OrigIDom, NewPreheader, OrigPreheader,
- OrigHeader, DT, DF, VM);
- }
-
- DT->addNewBlock(NewBB, NewIDom);
-
- // Copy cloned dominance frontiner set
- DominanceFrontier::DomSetType NewDFSet;
- if (DF) {
- DominanceFrontier::iterator DFI = DF->find(Orig);
- if ( DFI != DF->end()) {
- DominanceFrontier::DomSetType S = DFI->second;
- for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- DenseMap<const Value*, Value*>::iterator IDM = VM.find(BB);
- if (IDM != VM.end())
- NewDFSet.insert(cast<BasicBlock>(IDM->second));
- else
- NewDFSet.insert(BB);
- }
- }
- DF->addBasicBlock(NewBB, NewDFSet);
- }
-}
-
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
@@ -569,10 +505,8 @@
// Insert the new branch.
BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
-
}
-
/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
/// condition in it (a cond branch from its header block to its latch block,
/// where the path through the loop that doesn't execute its body has no
@@ -582,15 +516,14 @@
Constant *Val,
BasicBlock *ExitBlock) {
DOUT << "loop-unswitch: Trivial-Unswitch loop %"
- << L->getHeader()->getName() << " [" << L->getBlocks().size()
+ << loopHeader->getName() << " [" << L->getBlocks().size()
<< " blocks] in Function " << L->getHeader()->getParent()->getName()
<< " on cond: " << *Val << " == " << *Cond << "\n";
// First step, split the preheader, so that we know that there is a safe place
- // to insert the conditional branch. We will change 'OrigPH' to have a
+ // to insert the conditional branch. We will change loopPreheader to have a
// conditional branch on Cond.
- BasicBlock *OrigPH = L->getLoopPreheader();
- BasicBlock *NewPH = SplitEdge(OrigPH, L->getHeader(), this);
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we should
@@ -606,9 +539,9 @@
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
- OrigPH->getTerminator());
- LPM->deleteSimpleAnalysisValue(OrigPH->getTerminator(), L);
- OrigPH->getTerminator()->eraseFromParent();
+ loopPreheader->getTerminator());
+ LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+ loopPreheader->getTerminator()->eraseFromParent();
// We need to reprocess this loop, it could be unswitched again.
redoLoop = true;
@@ -620,94 +553,52 @@
++NumTrivial;
}
-/// ReplaceLoopExternalDFMember -
-/// If BB's dominance frontier has a member that is not part of loop L then
-/// remove it. Add NewDFMember in BB's dominance frontier.
-void LoopUnswitch::ReplaceLoopExternalDFMember(Loop *L, BasicBlock *BB,
- BasicBlock *NewDFMember) {
-
- DominanceFrontier::iterator DFI = DF->find(BB);
- if (DFI == DF->end())
- return;
-
- DominanceFrontier::DomSetType &DFSet = DFI->second;
- for (DominanceFrontier::DomSetType::iterator DI = DFSet.begin(),
- DE = DFSet.end(); DI != DE;) {
- BasicBlock *B = *DI++;
- if (L->contains(B))
- continue;
-
- DF->removeFromFrontier(DFI, B);
- LoopDF.insert(B);
- }
-
- DF->addToFrontier(DFI, NewDFMember);
-}
-
/// SplitExitEdges - Split all of the edges from inside the loop to their exit
/// blocks. Update the appropriate Phi nodes as we do so.
void LoopUnswitch::SplitExitEdges(Loop *L,
- const SmallVector<BasicBlock *, 8> &ExitBlocks,
- SmallVector<BasicBlock *, 8> &MiddleBlocks) {
+ const SmallVector<BasicBlock *, 8> &ExitBlocks)
+{
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock));
for (unsigned j = 0, e = Preds.size(); j != e; ++j) {
- BasicBlock* MiddleBlock = SplitEdge(Preds[j], ExitBlock, this);
- MiddleBlocks.push_back(MiddleBlock);
+ BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this);
BasicBlock* StartBlock = Preds[j];
BasicBlock* EndBlock;
- if (MiddleBlock->getSinglePredecessor() == ExitBlock) {
- EndBlock = MiddleBlock;
- MiddleBlock = EndBlock->getSinglePredecessor();;
+ if (NewExitBlock->getSinglePredecessor() == ExitBlock) {
+ EndBlock = NewExitBlock;
+ NewExitBlock = EndBlock->getSinglePredecessor();;
} else {
EndBlock = ExitBlock;
}
- OrigLoopExitMap[StartBlock] = EndBlock;
-
std::set<PHINode*> InsertedPHIs;
PHINode* OldLCSSA = 0;
for (BasicBlock::iterator I = EndBlock->begin();
(OldLCSSA = dyn_cast<PHINode>(I)); ++I) {
- Value* OldValue = OldLCSSA->getIncomingValueForBlock(MiddleBlock);
+ Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock);
PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(),
OldLCSSA->getName() + ".us-lcssa",
- MiddleBlock->getTerminator());
+ NewExitBlock->getTerminator());
NewLCSSA->addIncoming(OldValue, StartBlock);
- OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(MiddleBlock),
+ OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock),
NewLCSSA);
InsertedPHIs.insert(NewLCSSA);
}
- BasicBlock::iterator InsertPt = EndBlock->begin();
- while (dyn_cast<PHINode>(InsertPt)) ++InsertPt;
- for (BasicBlock::iterator I = MiddleBlock->begin();
+ BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI();
+ for (BasicBlock::iterator I = NewExitBlock->begin();
(OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0;
++I) {
PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(),
OldLCSSA->getName() + ".us-lcssa",
InsertPt);
OldLCSSA->replaceAllUsesWith(NewLCSSA);
- NewLCSSA->addIncoming(OldLCSSA, MiddleBlock);
+ NewLCSSA->addIncoming(OldLCSSA, NewExitBlock);
}
- if (DF && DT) {
- // StartBlock -- > MiddleBlock -- > EndBlock
- // StartBlock is loop exiting block. EndBlock will become merge point
- // of two loop exits after loop unswitch.
-
- // If StartBlock's DF member includes a block that is not loop member
- // then replace that DF member with EndBlock.
-
- // If MiddleBlock's DF member includes a block that is not loop member
- // tnen replace that DF member with EndBlock.
-
- ReplaceLoopExternalDFMember(L, StartBlock, EndBlock);
- ReplaceLoopExternalDFMember(L, MiddleBlock, EndBlock);
- }
}
}
@@ -718,22 +609,18 @@
/// condition outside of either loop. Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
Loop *L) {
- Function *F = L->getHeader()->getParent();
+ Function *F = loopHeader->getParent();
DOUT << "loop-unswitch: Unswitching loop %"
- << L->getHeader()->getName() << " [" << L->getBlocks().size()
+ << loopHeader->getName() << " [" << L->getBlocks().size()
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n";
- // LoopBlocks contains all of the basic blocks of the loop, including the
- // preheader of the loop, the body of the loop, and the exit blocks of the
- // loop, in that order.
- std::vector<BasicBlock*> LoopBlocks;
+ LoopBlocks.clear();
+ NewBlocks.clear();
// First step, split the preheader and exit blocks, and add these blocks to
// the LoopBlocks list.
- BasicBlock *OrigHeader = L->getHeader();
- BasicBlock *OrigPreheader = L->getLoopPreheader();
- BasicBlock *NewPreheader = SplitEdge(OrigPreheader, L->getHeader(), this);
+ BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
LoopBlocks.push_back(NewPreheader);
// We want the loop to come after the preheader, but before the exit blocks.
@@ -744,8 +631,7 @@
// Split all of the edges from inside the loop to their exit blocks. Update
// the appropriate Phi nodes as we do so.
- SmallVector<BasicBlock *,8> MiddleBlocks;
- SplitExitEdges(L, ExitBlocks, MiddleBlocks);
+ SplitExitEdges(L, ExitBlocks);
// The exit blocks may have been changed due to edge splitting, recompute.
ExitBlocks.clear();
@@ -757,7 +643,6 @@
// Next step, clone all of the basic blocks that make up the loop (including
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
- std::vector<BasicBlock*> NewBlocks;
NewBlocks.reserve(LoopBlocks.size());
DenseMap<const Value*, Value*> ValueMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
@@ -767,21 +652,6 @@
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L);
}
- // OutSiders are basic block that are dominated by original header and
- // at the same time they are not part of loop.
- SmallPtrSet<BasicBlock *, 8> OutSiders;
- if (DT) {
- DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
- for(std::vector<DomTreeNode*>::iterator DI = OrigHeaderNode->begin(),
- DE = OrigHeaderNode->end(); DI != DE; ++DI) {
- BasicBlock *B = (*DI)->getBlock();
-
- DenseMap<const Value*, Value*>::iterator VI = ValueMap.find(B);
- if (VI == ValueMap.end())
- OutSiders.insert(B);
- }
- }
-
// Splice the newly inserted blocks into the function right before the
// original preheader.
F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(),
@@ -805,7 +675,7 @@
assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
"Exit block should have been split to have one successor!");
BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
-
+
// If the successor of the exit block had PHI nodes, add an entry for
// NewExit.
PHINode *PN;
@@ -819,17 +689,13 @@
}
// Rewrite the code to refer to itself.
- for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
- BasicBlock *NB = NewBlocks[i];
- if (BasicBlock *UnwindDest = NB->getUnwindDest())
- NB->setUnwindDest(cast<BasicBlock>(ValueMap[UnwindDest]));
-
- for (BasicBlock::iterator I = NB->begin(), E = NB->end(); I != E; ++I)
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I)
RemapInstruction(I, ValueMap);
- }
// Rewrite the original preheader to select between versions of the loop.
- BranchInst *OldBR = cast<BranchInst>(OrigPreheader->getTerminator());
+ BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
"Preheader splitting did not work correctly!");
@@ -838,94 +704,6 @@
LPM->deleteSimpleAnalysisValue(OldBR, L);
OldBR->eraseFromParent();
- // Update dominator info
- if (DF && DT) {
-
- SmallVector<BasicBlock *,4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
-
- // Clone dominator info for all cloned basic block.
- for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
- BasicBlock *LBB = LoopBlocks[i];
- BasicBlock *NBB = NewBlocks[i];
- CloneDomInfo(NBB, LBB, NewPreheader, OrigPreheader,
- OrigHeader, DT, DF, ValueMap);
-
- // If LBB's dominance frontier includes DFMember
- // such that DFMember is also a member of LoopDF then
- // - Remove DFMember from LBB's dominance frontier
- // - Copy loop exiting blocks', that are dominated by BB,
- // dominance frontier member in BB's dominance frontier
-
- DominanceFrontier::iterator LBBI = DF->find(LBB);
- DominanceFrontier::iterator NBBI = DF->find(NBB);
- if (LBBI == DF->end())
- continue;
-
- DominanceFrontier::DomSetType &LBSet = LBBI->second;
- for (DominanceFrontier::DomSetType::iterator LI = LBSet.begin(),
- LE = LBSet.end(); LI != LE; /* NULL */) {
- BasicBlock *B = *LI++;
- if (B == LBB && B == L->getHeader())
- continue;
- bool removeB = false;
- if (!LoopDF.count(B))
- continue;
-
- // If LBB dominates loop exits then insert loop exit block's DF
- // into B's DF.
- for(SmallVector<BasicBlock *, 4>::iterator
- LExitI = ExitingBlocks.begin(),
- LExitE = ExitingBlocks.end(); LExitI != LExitE; ++LExitI) {
- BasicBlock *E = *LExitI;
-
- if (!DT->dominates(LBB,E))
- continue;
-
- DenseMap<BasicBlock *, BasicBlock *>::iterator DFBI =
- OrigLoopExitMap.find(E);
- if (DFBI == OrigLoopExitMap.end())
- continue;
-
- BasicBlock *DFB = DFBI->second;
- DF->addToFrontier(LBBI, DFB);
- DF->addToFrontier(NBBI, DFB);
- removeB = true;
- }
-
- // If B's replacement is inserted in DF then now is the time to remove
- // B.
- if (removeB) {
- DF->removeFromFrontier(LBBI, B);
- if (L->contains(B))
- DF->removeFromFrontier(NBBI, cast<BasicBlock>(ValueMap[B]));
- else
- DF->removeFromFrontier(NBBI, B);
- }
- }
-
- }
-
- // MiddleBlocks are dominated by original pre header. SplitEdge updated
- // MiddleBlocks' dominance frontier appropriately.
- for (unsigned i = 0, e = MiddleBlocks.size(); i != e; ++i) {
- BasicBlock *MBB = MiddleBlocks[i];
- if (!MBB->getSinglePredecessor())
- DT->changeImmediateDominator(MBB, OrigPreheader);
- }
-
- // All Outsiders are now dominated by original pre header.
- for (SmallPtrSet<BasicBlock *, 8>::iterator OI = OutSiders.begin(),
- OE = OutSiders.end(); OI != OE; ++OI) {
- BasicBlock *OB = *OI;
- DT->changeImmediateDominator(OB, OrigPreheader);
- }
-
- // New loop headers are dominated by original preheader
- DT->changeImmediateDominator(NewBlocks[0], OrigPreheader);
- DT->changeImmediateDominator(LoopBlocks[0], OrigPreheader);
- }
-
LoopProcessWorklist.push_back(NewLoop);
redoLoop = true;
@@ -937,6 +715,7 @@
// deleted. If so, don't simplify it.
if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop)
RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true);
+
}
/// RemoveFromWorklist - Remove all instances of I from the worklist vector
@@ -994,7 +773,7 @@
// If this is the header of a loop and the only pred is the latch, we now
// have an unreachable loop.
if (Loop *L = LI->getLoopFor(BB))
- if (L->getHeader() == BB && L->contains(Pred)) {
+ if (loopHeader == BB && L->contains(Pred)) {
// Remove the branch from the latch to the header block, this makes
// the header dead, which will make the latch dead (because the header
// dominates the latch).
@@ -1090,8 +869,6 @@
RemoveLoopFromWorklist(L);
}
-
-
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
// the value specified by Val in the specified loop, or we know it does NOT have
// that value. Rewrite any uses of LIC or of properties correlated to it.
@@ -1153,18 +930,19 @@
// trying to update it is complicated. So instead we preserve the
// loop structure and put the block on an dead code path.
+ BasicBlock *SISucc = SI->getSuccessor(i);
BasicBlock* Old = SI->getParent();
BasicBlock* Split = SplitBlock(Old, SI, this);
Instruction* OldTerm = Old->getTerminator();
- BranchInst::Create(Split, SI->getSuccessor(i),
+ BranchInst::Create(Split, SISucc,
ConstantInt::getTrue(), OldTerm);
LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L);
Old->getTerminator()->eraseFromParent();
PHINode *PN;
- for (BasicBlock::iterator II = SI->getSuccessor(i)->begin();
+ for (BasicBlock::iterator II = SISucc->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
Value *InVal = PN->removeIncomingValue(Split, false);
PN->addIncoming(InVal, Old);
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/MemCpyOptimizer.cpp Sun Jul 6 15:45:41 2008
@@ -14,23 +14,14 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
#include "llvm/ParameterAttributes.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetData.h"
@@ -40,13 +31,6 @@
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
-namespace {
- cl::opt<bool>
- FormMemSet("form-memset-from-stores",
- cl::desc("Transform straight-line stores to memsets"),
- cl::init(true), cl::Hidden);
-}
-
/// isBytewiseValue - If the specified value can be set by repeating the same
/// byte in memory, return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
@@ -332,13 +316,9 @@
}
// Helper fuctions
- bool processInstruction(Instruction* I,
- SmallVectorImpl<Instruction*> &toErase);
- bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase);
- bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
- SmallVectorImpl<Instruction*> &toErase);
- bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C,
- SmallVectorImpl<Instruction*> &toErase);
+ bool processStore(StoreInst *SI, BasicBlock::iterator& BBI);
+ bool processMemCpy(MemCpyInst* M);
+ bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C);
bool iterateOnFunction(Function &F);
};
@@ -357,8 +337,7 @@
/// some other patterns to fold away. In particular, this looks for stores to
/// neighboring locations of memory. If it sees enough consequtive ones
/// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
- if (!FormMemSet) return false;
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {
if (SI->isVolatile()) return false;
// There are two cases that are interesting for this code to handle: memcpy
@@ -473,8 +452,13 @@
cerr << *Range.TheStores[i];
cerr << "With: " << *C); C=C;
+ // Don't invalidate the iterator
+ BBI = BI;
+
// Zap all the stores.
- toErase.append(Range.TheStores.begin(), Range.TheStores.end());
+ for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(),
+ SE = Range.TheStores.end(); SI != SE; ++SI)
+ (*SI)->eraseFromParent();
++NumMemSetInfer;
MadeChange = true;
}
@@ -486,8 +470,7 @@
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
- SmallVectorImpl<Instruction*> &toErase) {
+bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -571,10 +554,17 @@
User* UI = srcUseList.back();
srcUseList.pop_back();
- if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+ if (isa<BitCastInst>(UI)) {
for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
I != E; ++I)
srcUseList.push_back(*I);
+ } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (G->hasAllZeroIndices())
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ else
+ return false;
} else if (UI != C && UI != cpy) {
return false;
}
@@ -597,14 +587,23 @@
return false;
// All the checks have passed, so do the transformation.
+ bool changedArgument = false;
for (unsigned i = 0; i < CS.arg_size(); ++i)
- if (CS.getArgument(i) == cpySrc) {
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
if (cpySrc->getType() != cpyDest->getType())
- cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(),
+ cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
cpyDest->getName(), C);
- CS.setArgument(i, cpyDest);
+ changedArgument = true;
+ if (CS.getArgument(i)->getType() != cpyDest->getType())
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.getArgument(i)->getType(), cpyDest->getName(), C));
+ else
+ CS.setArgument(i, cpyDest);
}
+ if (!changedArgument)
+ return false;
+
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
@@ -612,7 +611,8 @@
// Remove the memcpy
MD.removeInstruction(cpy);
- toErase.push_back(cpy);
+ cpy->eraseFromParent();
+ NumMemCpyInstr++;
return true;
}
@@ -621,8 +621,25 @@
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
/// This allows later passes to remove the first memcpy altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
- SmallVectorImpl<Instruction*> &toErase) {
+bool MemCpyOpt::processMemCpy(MemCpyInst* M) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE
+ // b) call-memcpy xform for return slot optimization
+ Instruction* dep = MD.getDependency(M);
+ if (dep == MemoryDependenceAnalysis::None ||
+ dep == MemoryDependenceAnalysis::NonLocal)
+ return false;
+ else if (!isa<MemCpyInst>(dep)) {
+ if (CallInst* C = dyn_cast<CallInst>(dep))
+ return performCallSlotOptzn(M, C);
+ else
+ return false;
+ }
+
+ MemCpyInst* MDep = cast<MemCpyInst>(dep);
+
// We can only transforms memcpy's where the dest of one is the source of the
// other
if (M->getSource() != MDep->getDest())
@@ -667,41 +684,22 @@
CallInst* C = CallInst::Create(MemCpyFun, args.begin(), args.end(), "", M);
- MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // If C and M don't interfere, then this is a valid transformation. If they
+ // did, this would mean that the two sources overlap, which would be bad.
if (MD.getDependency(C) == MDep) {
MD.dropInstruction(M);
- toErase.push_back(M);
+ M->eraseFromParent();
+
+ NumMemCpyInstr++;
+
return true;
}
+ // Otherwise, there was no point in doing this, so we remove the call we
+ // inserted and act like nothing happened.
MD.removeInstruction(C);
- toErase.push_back(C);
- return false;
-}
-
-/// processInstruction - When calculating availability, handle an instruction
-/// by inserting it into the appropriate sets
-bool MemCpyOpt::processInstruction(Instruction *I,
- SmallVectorImpl<Instruction*> &toErase) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return processStore(SI, toErase);
-
- if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
- MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // The are two possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE
- // b) call-memcpy xform for return slot optimization
- Instruction* dep = MD.getDependency(M);
- if (dep == MemoryDependenceAnalysis::None ||
- dep == MemoryDependenceAnalysis::NonLocal)
- return false;
- if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
- return processMemCpy(M, MemCpy, toErase);
- if (CallInst* C = dyn_cast<CallInst>(dep))
- return performCallSlotOptzn(M, C, toErase);
- return false;
- }
+ C->eraseFromParent();
return false;
}
@@ -726,42 +724,19 @@
// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool changed_function = false;
-
- DominatorTree &DT = getAnalysis<DominatorTree>();
-
- SmallVector<Instruction*, 8> toErase;
-
- // Top-down walk of the dominator tree
- for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
- E = df_end(DT.getRootNode()); DI != E; ++DI) {
- BasicBlock* BB = DI->getBlock();
+ // Walk all instruction in the function
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- changed_function |= processInstruction(BI, toErase);
- if (toErase.empty()) {
- ++BI;
- continue;
- }
-
- // If we need some instructions deleted, do it now.
- NumMemCpyInstr += toErase.size();
+ // Avoid invalidating the iterator
+ Instruction* I = BI++;
- // Avoid iterator invalidation.
- bool AtStart = BI == BB->begin();
- if (!AtStart)
- --BI;
-
- for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I)
- (*I)->eraseFromParent();
-
- if (AtStart)
- BI = BB->begin();
- else
- ++BI;
-
- toErase.clear();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ changed_function |= processStore(SI, BI);
+ else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+ changed_function |= processMemCpy(M);
+ }
}
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/PredicateSimplifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/PredicateSimplifier.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/PredicateSimplifier.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/PredicateSimplifier.cpp Sun Jul 6 15:45:41 2008
@@ -1509,7 +1509,7 @@
}
// We'd like to allow makeEqual on two values to perform a simple
- // substitution without every creating nodes in the IG whenever possible.
+ // substitution without creating nodes in the IG whenever possible.
//
// The first iteration through this loop operates on V2 before going
// through the Remove list and operating on those too. If all of the
@@ -1594,6 +1594,7 @@
if (mergeIGNode) {
// Create N1.
if (!n1) n1 = VN.getOrInsertVN(V1, Top);
+ IG.node(n1); // Ensure that IG.Nodes won't get resized
// Migrate relationships from removed nodes to N1.
for (SetVector<unsigned>::iterator I = Remove.begin(), E = Remove.end();
@@ -2646,12 +2647,12 @@
}
}
}
-
- char PredicateSimplifier::ID = 0;
- RegisterPass<PredicateSimplifier> X("predsimplify",
- "Predicate Simplifier");
}
+char PredicateSimplifier::ID = 0;
+static RegisterPass<PredicateSimplifier>
+X("predsimplify", "Predicate Simplifier");
+
FunctionPass *llvm::createPredicateSimplifierPass() {
return new PredicateSimplifier();
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/Reassociate.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/Reassociate.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/Reassociate.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/Reassociate.cpp Sun Jul 6 15:45:41 2008
@@ -64,7 +64,7 @@
<< "," << Ops[i].Rank;
}
-namespace {
+namespace {
class VISIBILITY_HIDDEN Reassociate : public FunctionPass {
std::map<BasicBlock*, unsigned> RankMap;
std::map<Value*, unsigned> ValueRankMap;
@@ -92,11 +92,11 @@
void RemoveDeadBinaryOp(Value *V);
};
-
- char Reassociate::ID = 0;
- RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
}
+char Reassociate::ID = 0;
+static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
+
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
@@ -194,7 +194,7 @@
static Instruction *LowerNegateToMultiply(Instruction *Neg) {
Constant *Cst = ConstantInt::getAllOnesValue(Neg->getType());
- Instruction *Res = BinaryOperator::createMul(Neg->getOperand(1), Cst, "",Neg);
+ Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Neg->eraseFromParent();
@@ -389,7 +389,7 @@
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
//
- return BinaryOperator::createNeg(V, V->getName() + ".neg", BI);
+ return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
}
/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
@@ -427,7 +427,7 @@
//
Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
Instruction *New =
- BinaryOperator::createAdd(Sub->getOperand(0), NegVal, "", Sub);
+ BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
New->takeName(Sub);
// Everyone now refers to the add instruction.
@@ -451,7 +451,7 @@
Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
- Instruction *Mul = BinaryOperator::createMul(Shl->getOperand(0), MulCst,
+ Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
"", Shl);
Mul->takeName(Shl);
Shl->replaceAllUsesWith(Mul);
@@ -485,7 +485,7 @@
Value *V1 = Ops.back();
Ops.pop_back();
Value *V2 = EmitAddTreeOfValues(I, Ops);
- return BinaryOperator::createAdd(V2, V1, "tmp", I);
+ return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
}
/// RemoveFactorFromExpression - If V is an expression tree that is a
@@ -714,7 +714,7 @@
// this, we could otherwise run into situations where removing a factor
// from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently.
- Instruction *DummyInst = BinaryOperator::createAdd(MaxOccVal, MaxOccVal);
+ Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
std::vector<Value*> NewMulOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
@@ -729,7 +729,7 @@
unsigned NumAddedValues = NewMulOps.size();
Value *V = EmitAddTreeOfValues(I, NewMulOps);
- Value *V2 = BinaryOperator::createMul(V, MaxOccVal, "tmp", I);
+ Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
// Now that we have inserted V and its sole use, optimize it. This allows
// us to handle cases that require multiple factoring steps, such as this:
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/Reg2Mem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/Reg2Mem.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/Reg2Mem.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/Reg2Mem.cpp Sun Jul 6 15:45:41 2008
@@ -67,7 +67,7 @@
while (isa<AllocaInst>(I)) ++I;
CastInst *AllocaInsertionPoint =
- CastInst::create(Instruction::BitCast,
+ CastInst::Create(Instruction::BitCast,
Constant::getNullValue(Type::Int32Ty), Type::Int32Ty,
"reg2mem alloca point", I);
@@ -111,14 +111,15 @@
return false;
}
};
-
- char RegToMem::ID = 0;
- RegisterPass<RegToMem> X("reg2mem", "Demote all values to stack slots");
}
+
+char RegToMem::ID = 0;
+static RegisterPass<RegToMem>
+X("reg2mem", "Demote all values to stack slots");
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
//
-const PassInfo *llvm::DemoteRegisterToMemoryID = X.getPassInfo();
+const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
}
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/SCCP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/SCCP.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/SCCP.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/SCCP.cpp Sun Jul 6 15:45:41 2008
@@ -29,6 +29,7 @@
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Compiler.h"
@@ -130,21 +131,6 @@
}
};
-/// LatticeValIndex - LatticeVal and associated Index. This is used
-/// to track individual operand Lattice values for multi value ret instructions.
-class VISIBILITY_HIDDEN LatticeValIndexed {
- public:
- LatticeValIndexed(unsigned I = 0) { Index = I; }
- LatticeVal &getLatticeVal() { return LV; }
- unsigned getIndex() const { return Index; }
-
- void setLatticeVal(LatticeVal &L) { LV = L; }
- void setIndex(unsigned I) { Index = I; }
-
- private:
- LatticeVal LV;
- unsigned Index;
-};
//===----------------------------------------------------------------------===//
//
/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
@@ -167,7 +153,7 @@
/// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
/// that return multiple values.
- std::multimap<Function*, LatticeValIndexed> TrackedMultipleRetVals;
+ std::map<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
// The reason for two worklists is that overdefined is the lowest state
// on the lattice, and moving things to overdefined as fast as possible
@@ -194,7 +180,7 @@
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
void MarkBlockExecutable(BasicBlock *BB) {
- DOUT << "Marking Block Executable: " << BB->getName() << "\n";
+ DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n";
BBExecutable.insert(BB); // Basic block is executable!
BBWorkList.push_back(BB); // Add the block to the work list!
}
@@ -220,11 +206,10 @@
// Add an entry, F -> undef.
if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- TrackedMultipleRetVals.insert(std::pair<Function *, LatticeValIndexed>
- (F, LatticeValIndexed(i)));
- }
- else
- TrackedRetVals[F];
+ TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+ LatticeVal()));
+ } else
+ TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
}
/// Solve - Solve for constants and executable blocks.
@@ -291,7 +276,6 @@
// markOverdefined - Make a value be marked as "overdefined". If the
// value is not already overdefined, add it to the overdefined instruction
// work list so that the users of the instruction are updated later.
-
inline void markOverdefined(LatticeVal &IV, Value *V) {
if (IV.markOverdefined()) {
DEBUG(DOUT << "markOverdefined: ";
@@ -351,8 +335,8 @@
return; // This edge is already known to be executable!
if (BBExecutable.count(Dest)) {
- DOUT << "Marking Edge Executable: " << Source->getName()
- << " -> " << Dest->getName() << "\n";
+ DOUT << "Marking Edge Executable: " << Source->getNameStart()
+ << " -> " << Dest->getNameStart() << "\n";
// The destination is already executable, but we just made an edge
// feasible that wasn't before. Revisit the PHI nodes in the block
@@ -407,6 +391,8 @@
void visitExtractElementInst(ExtractElementInst &I);
void visitInsertElementInst(InsertElementInst &I);
void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
// Instructions that cannot be folded away...
void visitStoreInst (Instruction &I);
@@ -465,20 +451,8 @@
(SCValue.isConstant() && !isa<ConstantInt>(SCValue.getConstant()))) {
// All destinations are executable!
Succs.assign(TI.getNumSuccessors(), true);
- } else if (SCValue.isConstant()) {
- Constant *CPV = SCValue.getConstant();
- // Make sure to skip the "default value" which isn't a value
- for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i) {
- if (SI->getSuccessorValue(i) == CPV) {// Found the right branch...
- Succs[i] = true;
- return;
- }
- }
-
- // Constant value not equal to any of the branches... must execute
- // default branch then...
- Succs[0] = true;
- }
+ } else if (SCValue.isConstant())
+ Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true;
} else {
assert(0 && "SCCP: Don't know how to handle this terminator!");
}
@@ -640,7 +614,7 @@
if (!F->hasInternalLinkage())
return;
- if (!TrackedRetVals.empty()) {
+ if (!TrackedRetVals.empty() && I.getNumOperands() == 1) {
DenseMap<Function*, LatticeVal>::iterator TFRVI =
TrackedRetVals.find(F);
if (TFRVI != TrackedRetVals.end() &&
@@ -651,15 +625,24 @@
}
}
- // Handle function that returns multiple values.
- std::multimap<Function*, LatticeValIndexed>::iterator It, E;
- tie(It, E) = TrackedMultipleRetVals.equal_range(F);
- if (It != E) {
- for (; It != E; ++It) {
- LatticeValIndexed &LV = It->second;
- unsigned Idx = LV.getIndex();
- Value *V = I.getOperand(Idx);
- mergeInValue(LV.getLatticeVal(), V, getValueState(V));
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty() && I.getNumOperands() > 1) {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ std::map<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+ if (It == TrackedMultipleRetVals.end()) break;
+ mergeInValue(It->second, F, getValueState(I.getOperand(i)));
+ }
+ } else if (!TrackedMultipleRetVals.empty() &&
+ I.getNumOperands() == 1 &&
+ isa<StructType>(I.getOperand(0)->getType())) {
+ for (unsigned i = 0, e = I.getOperand(0)->getType()->getNumContainedTypes();
+ i != e; ++i) {
+ std::map<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+ if (It == TrackedMultipleRetVals.end()) break;
+ Value *Val = FindInsertedValue(I.getOperand(0), i);
+ mergeInValue(It->second, F, getValueState(Val));
}
}
}
@@ -687,28 +670,124 @@
}
void SCCPSolver::visitGetResultInst(GetResultInst &GRI) {
- unsigned Idx = GRI.getIndex();
Value *Aggr = GRI.getOperand(0);
- Function *F = NULL;
+
+ // If the operand to the getresult is an undef, the result is undef.
+ if (isa<UndefValue>(Aggr))
+ return;
+
+ Function *F;
+ if (CallInst *CI = dyn_cast<CallInst>(Aggr))
+ F = CI->getCalledFunction();
+ else
+ F = cast<InvokeInst>(Aggr)->getCalledFunction();
+
+ // TODO: If IPSCCP resolves the callee of this function, we could propagate a
+ // result back!
+ if (F == 0 || TrackedMultipleRetVals.empty()) {
+ markOverdefined(&GRI);
+ return;
+ }
+
+ // See if we are tracking the result of the callee.
+ std::map<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, GRI.getIndex()));
+
+ // If not tracking this function (for example, it is a declaration) just move
+ // to overdefined.
+ if (It == TrackedMultipleRetVals.end()) {
+ markOverdefined(&GRI);
+ return;
+ }
+
+ // Otherwise, the value will be merged in here as a result of CallSite
+ // handling.
+}
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+ Value *Aggr = EVI.getAggregateOperand();
+
+ // If the operand to the extractvalue is an undef, the result is undef.
+ if (isa<UndefValue>(Aggr))
+ return;
+
+ // Currently only handle single-index extractvalues.
+ if (EVI.getNumIndices() != 1) {
+ markOverdefined(&EVI);
+ return;
+ }
+
+ Function *F = 0;
if (CallInst *CI = dyn_cast<CallInst>(Aggr))
F = CI->getCalledFunction();
else if (InvokeInst *II = dyn_cast<InvokeInst>(Aggr))
F = II->getCalledFunction();
- if (!F)
+ // TODO: If IPSCCP resolves the callee of this function, we could propagate a
+ // result back!
+ if (F == 0 || TrackedMultipleRetVals.empty()) {
+ markOverdefined(&EVI);
+ return;
+ }
+
+ // See if we are tracking the result of the callee.
+ std::map<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, *EVI.idx_begin()));
+
+ // If not tracking this function (for example, it is a declaration) just move
+ // to overdefined.
+ if (It == TrackedMultipleRetVals.end()) {
+ markOverdefined(&EVI);
+ return;
+ }
+
+ // Otherwise, the value will be merged in here as a result of CallSite
+ // handling.
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+ Value *Aggr = IVI.getAggregateOperand();
+ Value *Val = IVI.getInsertedValueOperand();
+
+ // If the operands to the insertvalue are undef, the result is undef.
+ if (isa<UndefValue>(Aggr) && isa<UndefValue>(Val))
return;
- std::multimap<Function*, LatticeValIndexed>::iterator It, E;
- tie(It, E) = TrackedMultipleRetVals.equal_range(F);
- if (It == E)
+ // Currently only handle single-index insertvalues.
+ if (IVI.getNumIndices() != 1) {
+ markOverdefined(&IVI);
return;
+ }
- for (; It != E; ++It) {
- LatticeValIndexed &LIV = It->second;
- if (LIV.getIndex() == Idx) {
- mergeInValue(&GRI, LIV.getLatticeVal());
+ // Currently only handle insertvalue instructions that are in a single-use
+ // chain that builds up a return value.
+ for (const InsertValueInst *TmpIVI = &IVI; ; ) {
+ if (!TmpIVI->hasOneUse()) {
+ markOverdefined(&IVI);
+ return;
+ }
+ const Value *V = *TmpIVI->use_begin();
+ if (isa<ReturnInst>(V))
+ break;
+ TmpIVI = dyn_cast<InsertValueInst>(V);
+ if (!TmpIVI) {
+ markOverdefined(&IVI);
+ return;
}
}
+
+ // See if we are tracking the result of the callee.
+ Function *F = IVI.getParent()->getParent();
+ std::map<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ It = TrackedMultipleRetVals.find(std::make_pair(F, *IVI.idx_begin()));
+
+ // Merge in the inserted member value.
+ if (It != TrackedMultipleRetVals.end())
+ mergeInValue(It->second, F, getValueState(Val));
+
+ // Mark the aggregate result of the IVI overdefined; any tracking that we do
+ // will be done on the individual member values.
+ markOverdefined(&IVI);
}
void SCCPSolver::visitSelectInst(SelectInst &I) {
@@ -1127,79 +1206,101 @@
void SCCPSolver::visitCallSite(CallSite CS) {
Function *F = CS.getCalledFunction();
-
- DenseMap<Function*, LatticeVal>::iterator TFRVI =TrackedRetVals.end();
- // If we are tracking this function, we must make sure to bind arguments as
- // appropriate.
- bool FirstCall = false;
- if (F && F->hasInternalLinkage()) {
- TFRVI = TrackedRetVals.find(F);
- if (TFRVI != TrackedRetVals.end())
- FirstCall = true;
- else {
- std::multimap<Function*, LatticeValIndexed>::iterator It, E;
- tie(It, E) = TrackedMultipleRetVals.equal_range(F);
- if (It != E)
- FirstCall = true;
- }
- }
-
- if (FirstCall) {
- // If this is the first call to the function hit, mark its entry block
- // executable.
- if (!BBExecutable.count(F->begin()))
- MarkBlockExecutable(F->begin());
+ Instruction *I = CS.getInstruction();
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (F == 0 || !F->hasInternalLinkage()) {
+CallOverdefined:
+ // Void return and not tracking callee, just bail.
+ if (I->getType() == Type::VoidTy) return;
- CallSite::arg_iterator CAI = CS.arg_begin();
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI, ++CAI) {
- LatticeVal &IV = ValueState[AI];
- if (!IV.isOverdefined())
- mergeInValue(IV, AI, getValueState(*CAI));
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (!isa<StructType>(I->getType()) && F && F->isDeclaration() &&
+ canConstantFoldCallTo(F)) {
+
+ SmallVector<Constant*, 8> Operands;
+ for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+ AI != E; ++AI) {
+ LatticeVal &State = getValueState(*AI);
+ if (State.isUndefined())
+ return; // Operands are not resolved yet.
+ else if (State.isOverdefined()) {
+ markOverdefined(I);
+ return;
+ }
+ assert(State.isConstant() && "Unknown state!");
+ Operands.push_back(State.getConstant());
+ }
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(F, &Operands[0], Operands.size())) {
+ markConstant(I, C);
+ return;
+ }
}
- }
- Instruction *I = CS.getInstruction();
-
- if (!CS.doesNotThrow() && I->getParent()->getUnwindDest())
- markEdgeExecutable(I->getParent(), I->getParent()->getUnwindDest());
-
- if (I->getType() == Type::VoidTy) return;
-
- LatticeVal &IV = ValueState[I];
- if (IV.isOverdefined()) return;
-
- // Propagate the single return value of the function to the value of the
- // instruction.
- if (TFRVI != TrackedRetVals.end()) {
- mergeInValue(IV, I, TFRVI->second);
- return;
- }
- if (F == 0 || !F->isDeclaration() || !canConstantFoldCallTo(F)) {
- markOverdefined(IV, I);
+ // Otherwise, we don't know anything about this call, mark it overdefined.
+ markOverdefined(I);
return;
}
- SmallVector<Constant*, 8> Operands;
- Operands.reserve(I->getNumOperands()-1);
-
- for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
- AI != E; ++AI) {
- LatticeVal &State = getValueState(*AI);
- if (State.isUndefined())
- return; // Operands are not resolved yet...
- else if (State.isOverdefined()) {
- markOverdefined(IV, I);
- return;
+ // If this is a single/zero retval case, see if we're tracking the function.
+ DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(I, TFRVI->second);
+ } else if (isa<StructType>(I->getType())) {
+ // Check to see if we're tracking this callee, if not, handle it in the
+ // common path above.
+ std::map<std::pair<Function*, unsigned>, LatticeVal>::iterator
+ TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0));
+ if (TMRVI == TrackedMultipleRetVals.end())
+ goto CallOverdefined;
+
+ // If we are tracking this callee, propagate the return values of the call
+ // into this call site. We do this by walking all the uses. Single-index
+ // ExtractValueInst uses can be tracked; anything more complicated is
+ // currently handled conservatively.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ if (GetResultInst *GRI = dyn_cast<GetResultInst>(*UI)) {
+ mergeInValue(GRI,
+ TrackedMultipleRetVals[std::make_pair(F, GRI->getIndex())]);
+ continue;
+ }
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(*UI)) {
+ if (EVI->getNumIndices() == 1) {
+ mergeInValue(EVI,
+ TrackedMultipleRetVals[std::make_pair(F, *EVI->idx_begin())]);
+ continue;
+ }
+ }
+ // The aggregate value is used in a way not handled here. Assume nothing.
+ markOverdefined(*UI);
}
- assert(State.isConstant() && "Unknown state!");
- Operands.push_back(State.getConstant());
+ } else {
+ // Otherwise we're not tracking this callee, so handle it in the
+ // common path above.
+ goto CallOverdefined;
+ }
+
+ // Finally, if this is the first call to the function hit, mark its entry
+ // block executable.
+ if (!BBExecutable.count(F->begin()))
+ MarkBlockExecutable(F->begin());
+
+ // Propagate information from this call site into the callee.
+ CallSite::arg_iterator CAI = CS.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+ AI != E; ++AI, ++CAI) {
+ LatticeVal &IV = ValueState[AI];
+ if (!IV.isOverdefined())
+ mergeInValue(IV, AI, getValueState(*CAI));
}
-
- if (Constant *C = ConstantFoldCall(F, &Operands[0], Operands.size()))
- markConstant(IV, I, C);
- else
- markOverdefined(IV, I);
}
@@ -1380,6 +1481,12 @@
else
markOverdefined(LV, I);
return true;
+ case Instruction::Call:
+ // If a call has an undef result, it is because it is constant foldable
+ // but one of the inputs was undef. Just force the result to
+ // overdefined.
+ markOverdefined(LV, I);
+ return true;
}
}
@@ -1389,6 +1496,8 @@
if (!getValueState(BI->getCondition()).isUndefined())
continue;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors()<2) // no cases
+ continue;
if (!getValueState(SI->getCondition()).isUndefined())
continue;
} else {
@@ -1445,11 +1554,11 @@
AU.setPreservesCFG();
}
};
-
- char SCCP::ID = 0;
- RegisterPass<SCCP> X("sccp", "Sparse Conditional Constant Propagation");
} // end anonymous namespace
+char SCCP::ID = 0;
+static RegisterPass<SCCP>
+X("sccp", "Sparse Conditional Constant Propagation");
// createSCCPPass - This is the public interface to this file...
FunctionPass *llvm::createSCCPPass() {
@@ -1461,7 +1570,7 @@
// and return true if the function was modified.
//
bool SCCP::runOnFunction(Function &F) {
- DOUT << "SCCP on function '" << F.getName() << "'\n";
+ DOUT << "SCCP on function '" << F.getNameStart() << "'\n";
SCCPSolver Solver;
// Mark the first block of the function as being executable.
@@ -1514,25 +1623,28 @@
//
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Inst->getType() != Type::VoidTy) {
- LatticeVal &IV = Values[Inst];
- if ((IV.isConstant() || IV.isUndefined()) &&
- !isa<TerminatorInst>(Inst)) {
- Constant *Const = IV.isConstant()
- ? IV.getConstant() : UndefValue::get(Inst->getType());
- DOUT << " Constant: " << *Const << " = " << *Inst;
-
- // Replaces all of the uses of a variable with uses of the constant.
- Inst->replaceAllUsesWith(Const);
-
- // Delete the instruction.
- BB->getInstList().erase(Inst);
-
- // Hey, we just changed something!
- MadeChanges = true;
- ++NumInstRemoved;
- }
- }
+ if (Inst->getType() == Type::VoidTy ||
+ isa<StructType>(Inst->getType()) ||
+ isa<TerminatorInst>(Inst))
+ continue;
+
+ LatticeVal &IV = Values[Inst];
+ if (!IV.isConstant() && !IV.isUndefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DOUT << " Constant: " << *Const << " = " << *Inst;
+
+ // Replaces all of the uses of a variable with uses of the constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++NumInstRemoved;
}
}
@@ -1550,12 +1662,12 @@
IPSCCP() : ModulePass((intptr_t)&ID) {}
bool runOnModule(Module &M);
};
-
- char IPSCCP::ID = 0;
- RegisterPass<IPSCCP>
- Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
} // end anonymous namespace
+char IPSCCP::ID = 0;
+static RegisterPass<IPSCCP>
+Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
+
// createIPSCCPPass - This is the public interface to this file...
ModulePass *llvm::createIPSCCPPass() {
return new IPSCCP();
@@ -1688,27 +1800,30 @@
} else {
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Inst->getType() != Type::VoidTy) {
- LatticeVal &IV = Values[Inst];
- if (IV.isConstant() ||
- (IV.isUndefined() && !isa<TerminatorInst>(Inst))) {
- Constant *Const = IV.isConstant()
- ? IV.getConstant() : UndefValue::get(Inst->getType());
- DOUT << " Constant: " << *Const << " = " << *Inst;
-
- // Replaces all of the uses of a variable with uses of the
- // constant.
- Inst->replaceAllUsesWith(Const);
-
- // Delete the instruction.
- if (!isa<TerminatorInst>(Inst) && !isa<CallInst>(Inst))
- BB->getInstList().erase(Inst);
-
- // Hey, we just changed something!
- MadeChanges = true;
- ++IPNumInstRemoved;
- }
- }
+ if (Inst->getType() == Type::VoidTy ||
+ isa<StructType>(Inst->getType()) ||
+ isa<TerminatorInst>(Inst))
+ continue;
+
+ LatticeVal &IV = Values[Inst];
+ if (!IV.isConstant() && !IV.isUndefined())
+ continue;
+
+ Constant *Const = IV.isConstant()
+ ? IV.getConstant() : UndefValue::get(Inst->getType());
+ DOUT << " Constant: " << *Const << " = " << *Inst;
+
+ // Replaces all of the uses of a variable with uses of the
+ // constant.
+ Inst->replaceAllUsesWith(Const);
+
+ // Delete the instruction.
+ if (!isa<CallInst>(Inst))
+ Inst->eraseFromParent();
+
+ // Hey, we just changed something!
+ MadeChanges = true;
+ ++IPNumInstRemoved;
}
}
@@ -1719,11 +1834,6 @@
// If there are any PHI nodes in this successor, drop entries for BB now.
BasicBlock *DeadBB = BlocksToErase[i];
while (!DeadBB->use_empty()) {
- if (BasicBlock *PredBB = dyn_cast<BasicBlock>(DeadBB->use_back())) {
- PredBB->setUnwindDest(NULL);
- continue;
- }
-
Instruction *I = cast<Instruction>(DeadBB->use_back());
bool Folded = ConstantFoldTerminator(I->getParent());
if (!Folded) {
@@ -1783,7 +1893,7 @@
GlobalVariable *GV = I->first;
assert(!I->second.isOverdefined() &&
"Overdefined values should have been taken out of the map!");
- DOUT << "Found that GV '" << GV->getName()<< "' is constant!\n";
+ DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n";
while (!GV->use_empty()) {
StoreInst *SI = cast<StoreInst>(GV->use_back());
SI->eraseFromParent();
Modified: llvm/branches/non-call-eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=53163&r1=53162&r2=53163&view=diff
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/ScalarReplAggregates.cpp Sun Jul 6 15:45:41 2008
@@ -124,11 +124,11 @@
unsigned Offset);
static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
};
-
- char SROA::ID = 0;
- RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
}
+char SROA::ID = 0;
+static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
+
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
return new SROA(Threshold);
@@ -178,6 +178,14 @@
return Changed;
}
+/// getNumSAElements - Return the number of elements in the specific struct or
+/// array.
+static uint64_t getNumSAElements(const Type *T) {
+ if (const StructType *ST = dyn_cast<StructType>(T))
+ return ST->getNumElements();
+ return cast<ArrayType>(T)->getNumElements();
+}
+
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the malloc/alloca instructions in the function, removing
// them if they are only used by getelementptr instructions.
@@ -224,7 +232,10 @@
(isa<StructType>(AI->getAllocatedType()) ||
isa<ArrayType>(AI->getAllocatedType())) &&
AI->getAllocatedType()->isSized() &&
- TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold) {
+ // Do not promote any struct whose size is larger than "128" bytes.
+ TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold &&
+ // Do not promote any struct into more than "32" separate vars.
+ getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) {
// Check that all of the users of the allocation are capable of being
// transformed.
switch (isSafeAllocaToScalarRepl(AI)) {
@@ -302,6 +313,43 @@
continue;
}
+ // Replace:
+ // %res = load { i32, i32 }* %alloc
+ // with:
+ // %load.0 = load i32* %alloc.0
+ // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+ // %load.1 = load i32* %alloc.1
+ // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+ // (Also works for arrays instead of structs)
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ Value *Insert = UndefValue::get(LI->getType());
+ for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
+ Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
+ Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+ }
+ LI->replaceAllUsesWith(Insert);
+ LI->eraseFromParent();
+ continue;
+ }
+
+ // Replace:
+ // store { i32, i32 } %val, { i32, i32 }* %alloc
+ // with:
+ // %val.0 = extractvalue { i32, i32 } %val, 0
+ // store i32 %val.0, i32* %alloc.0
+ // %val.1 = extractvalue { i32, i32 } %val, 1
+ // store i32 %val.1, i32* %alloc.1
+ // (Also works for arrays instead of structs)
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ Value *Val = SI->getOperand(0);
+ for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
+ Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
+ new StoreInst(Extract, ElementAllocas[i], SI);
+ }
+ SI->eraseFromParent();
+ continue;
+ }
+
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
// We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
unsigned Idx =
@@ -440,6 +488,12 @@
if (BitCastInst *C = dyn_cast<BitCastInst>(User))
return isSafeUseOfBitCastedAllocation(C, AI, Info);
+ if (isa<LoadInst>(User))
+ return; // Loads (returning a first class aggregrate) are always rewritable
+
+ if (isa<StoreInst>(User) && User->getOperand(0) != AI)
+ return; // Store is ok if storing INTO the pointer, not storing the pointer
+
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
if (GEPI == 0)
return MarkUnsafe(Info);
@@ -631,11 +685,9 @@
// If this is a memcpy/memmove, emit a GEP of the other element address.
Value *OtherElt = 0;
if (OtherPtr) {
- Value *Idx[2];
- Idx[0] = Zero;
- Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+ Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) };
OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
- OtherPtr->getNameStr()+"."+utostr(i),
+ OtherPtr->getNameStr()+"."+utostr(i),
MI);
}
@@ -643,7 +695,7 @@
const Type *EltTy =cast<PointerType>(EltPtr->getType())->getElementType();
// If we got down to a scalar, insert a load or store as appropriate.
- if (EltTy->isFirstClassType()) {
+ if (EltTy->isSingleValueType()) {
if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp",
MI);
@@ -737,8 +789,7 @@
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
-static bool HasPadding(const Type *Ty, const TargetData &TD,
- bool inPacked = false) {
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
@@ -746,7 +797,7 @@
unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
// Padding in sub-elements?
- if (HasPadding(STy->getElementType(i), TD, STy->isPacked()))
+ if (HasPadding(STy->getElementType(i), TD))
return true;
// Check to see if there is any padding between this element and the
@@ -770,12 +821,11 @@
}
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- return HasPadding(ATy->getElementType(), TD, false);
+ return HasPadding(ATy->getElementType(), TD);
} else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- return HasPadding(VTy->getElementType(), TD, false);
+ return HasPadding(VTy->getElementType(), TD);
}
- return inPacked ?
- false : TD.getTypeSizeInBits(Ty) != TD.getABITypeSizeInBits(Ty);
+ return TD.getTypeSizeInBits(Ty) != TD.getABITypeSizeInBits(Ty);
}
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@@ -963,12 +1013,22 @@
Instruction *User = cast<Instruction>(*UI);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ // FIXME: Loads of a first class aggregrate value could be converted to a
+ // series of loads and insertvalues
+ if (!LI->getType()->isSingleValueType())
+ return 0;
+
if (MergeInType(LI->getType(), UsedType, TD))
return 0;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V) return 0;
+
+ // FIXME: Stores of a first class aggregrate value could be converted to a
+ // series of extractvalues and stores
+ if (!SI->getOperand(0)->getType()->isSingleValueType())
+ return 0;
// NOTE: We could handle storing of FP imms into integers here!
@@ -1204,11 +1264,11 @@
// We do this to support (f.e.) loads off the end of a structure where
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
- NV = BinaryOperator::createLShr(NV,
+ NV = BinaryOperator::CreateLShr(NV,
ConstantInt::get(NV->getType(),ShAmt),
LI->getName(), LI);
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
- NV = BinaryOperator::createShl(NV,
+ NV = BinaryOperator::CreateShl(NV,
ConstantInt::get(NV->getType(),-ShAmt),
LI->getName(), LI);
@@ -1308,12 +1368,12 @@
// only some bits in the structure are set.
APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
- SV = BinaryOperator::createShl(SV,
+ SV = BinaryOperator::CreateShl(SV,
ConstantInt::get(SV->getType(), ShAmt),
SV->getName(), SI);
Mask <<= ShAmt;
} else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
- SV = BinaryOperator::createLShr(SV,
+ SV = BinaryOperator::CreateLShr(SV,
ConstantInt::get(SV->getType(),-ShAmt),
SV->getName(), SI);
Mask = Mask.lshr(ShAmt);
@@ -1323,9 +1383,9 @@
// in the new bits.
if (SrcWidth != DestWidth) {
assert(DestWidth > SrcWidth);
- Old = BinaryOperator::createAnd(Old, ConstantInt::get(~Mask),
+ Old = BinaryOperator::CreateAnd(Old, ConstantInt::get(~Mask),
Old->getName()+".mask", SI);
- SV = BinaryOperator::createOr(Old, SV, SV->getName()+".ins", SI);
+ SV = BinaryOperator::CreateOr(Old, SV, SV->getName()+".ins", SI);
}
}
return SV;
Removed: llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFG.cpp?rev=53162&view=auto
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFG.cpp (original)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFG.cpp (removed)
@@ -1,230 +0,0 @@
-//===- SimplifyCFG.cpp - CFG Simplification Pass --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements dead code elimination and basic block merging, along
-// with a collection of other peephole control flow optimizations. For example:
-//
-// * Removes basic blocks with no predecessors.
-// * Merges a basic block into its predecessor if there is only one and the
-// predecessor only has one successor.
-// * Eliminates PHI nodes for basic blocks with a single predecessor.
-// * Eliminates a basic block that only contains an unconditional branch.
-// * Changes invoke instructions to nounwind functions to be calls.
-// * Change things like "if (x) if (y)" into "if (x&y)".
-// * etc..
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplifycfg"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/ParameterAttributes.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumSimpl, "Number of blocks simplified");
-
-namespace {
- struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass((intptr_t)&ID) {}
-
- virtual bool runOnFunction(Function &F);
- };
- char CFGSimplifyPass::ID = 0;
- RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
-}
-
-// Public interface to the CFGSimplification pass
-FunctionPass *llvm::createCFGSimplificationPass() {
- return new CFGSimplifyPass();
-}
-
-/// ChangeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I) {
- BasicBlock *BB = I->getParent();
- // Loop over all of the successors, removing BB's entry from any PHI
- // nodes.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- (*SI)->removePredecessor(BB);
-
- new UnreachableInst(I);
-
- // All instructions after this are dead.
- BasicBlock::iterator BBI = I, BBE = BB->end();
- while (BBI != BBE) {
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
- BB->getInstList().erase(BBI++);
- }
-}
-
-/// ChangeToCall - Convert the specified invoke into a normal call.
-static void ChangeToCall(InvokeInst *II) {
- BasicBlock *BB = II->getParent();
- SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
- Args.end(), "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setParamAttrs(II->getParamAttrs());
- II->replaceAllUsesWith(NewCall);
-
- // Follow the call by a branch to the normal destination.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(BB);
- BB->getInstList().erase(II);
-}
-
-static bool MarkAliveBlocks(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
- SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(BB);
- bool Changed = false;
- while (!Worklist.empty()) {
- BB = Worklist.back();
- Worklist.pop_back();
-
- if (!Reachable.insert(BB))
- continue;
-
- // Do a quick scan of the basic block, turning any obviously unreachable
- // instructions into LLVM unreachable insts. The instruction combining pass
- // canonicalizes unreachable insts into stores to null or undef.
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
- if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
- if (CI->doesNotReturn()) {
- // If we found a call to a no-return function, insert an unreachable
- // instruction after it. Make sure there isn't *already* one there
- // though.
- ++BBI;
- if (!isa<UnreachableInst>(BBI)) {
- ChangeToUnreachable(BBI);
- Changed = true;
- }
- break;
- }
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
- if (isa<ConstantPointerNull>(SI->getOperand(1)) ||
- isa<UndefValue>(SI->getOperand(1))) {
- ChangeToUnreachable(SI);
- Changed = true;
- break;
- }
- }
-
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- if (II->doesNotThrow()) {
- ChangeToCall(II);
- Changed = true;
- }
-
- Changed |= ConstantFoldTerminator(BB);
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- Worklist.push_back(*SI);
- }
- return Changed;
-}
-
-/// RemoveUnreachableBlocks - Remove blocks that are not reachable, even if they
-/// are in a dead cycle. Return true if a change was made, false otherwise.
-static bool RemoveUnreachableBlocks(Function &F) {
- SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = MarkAliveBlocks(F.begin(), Reachable);
-
- // If there are unreachable blocks in the CFG...
- if (Reachable.size() == F.size())
- return Changed;
-
- assert(Reachable.size() < F.size());
- NumSimpl += F.size()-Reachable.size();
-
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references...
- for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
- continue;
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.count(*SI))
- (*SI)->removePredecessor(BB);
- BB->dropAllReferences();
- }
-
- for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(I))
- I = F.getBasicBlockList().erase(I);
- else
- ++I;
-
- return true;
-}
-
-/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
-/// iterating until no more changes are made.
-static bool IterativeSimplifyCFG(Function &F) {
- bool Changed = false;
- bool LocalChange = true;
- while (LocalChange) {
- LocalChange = false;
-
- // Loop over all of the basic blocks (except the first one) and remove them
- // if they are unneeded...
- //
- for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++)) {
- LocalChange = true;
- ++NumSimpl;
- }
- }
- Changed |= LocalChange;
- }
- return Changed;
-}
-
-// It is possible that we may require multiple passes over the code to fully
-// simplify the CFG.
-//
-bool CFGSimplifyPass::runOnFunction(Function &F) {
- bool EverChanged = RemoveUnreachableBlocks(F);
- EverChanged |= IterativeSimplifyCFG(F);
-
- // If neither pass changed anything, we're done.
- if (!EverChanged) return false;
-
- // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
- // RemoveUnreachableBlocks is needed to nuke them, which means we should
- // iterate between the two optimizations. We structure the code like this to
- // avoid reruning IterativeSimplifyCFG if the second pass of
- // RemoveUnreachableBlocks doesn't do anything.
- if (!RemoveUnreachableBlocks(F))
- return true;
-
- do {
- EverChanged = IterativeSimplifyCFG(F);
- EverChanged |= RemoveUnreachableBlocks(F);
- } while (EverChanged);
-
- return true;
-}
Added: llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp?rev=53163&view=auto
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp (added)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyCFGPass.cpp Sun Jul 6 15:45:41 2008
@@ -0,0 +1,231 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations. For example:
+//
+// * Removes basic blocks with no predecessors.
+// * Merges a basic block into its predecessor if there is only one and the
+// predecessor only has one successor.
+// * Eliminates PHI nodes for basic blocks with a single predecessor.
+// * Eliminates a basic block that only contains an unconditional branch.
+// * Changes invoke instructions to nounwind functions to be calls.
+// * Change things like "if (x) if (y)" into "if (x&y)".
+// * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/ParameterAttributes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+ struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CFGSimplifyPass() : FunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+char CFGSimplifyPass::ID = 0;
+static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+ return new CFGSimplifyPass();
+}
+
+/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void ChangeToUnreachable(Instruction *I) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ new UnreachableInst(I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// ChangeToCall - Convert the specified invoke into a normal call.
+static void ChangeToCall(InvokeInst *II) {
+ BasicBlock *BB = II->getParent();
+ SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
+ Args.end(), "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setParamAttrs(II->getParamAttrs());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(BB);
+ BB->getInstList().erase(II);
+}
+
+static bool MarkAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+ SmallVector<BasicBlock*, 128> Worklist;
+ Worklist.push_back(BB);
+ bool Changed = false;
+ while (!Worklist.empty()) {
+ BB = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Reachable.insert(BB))
+ continue;
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ ChangeToUnreachable(BBI);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (isa<ConstantPointerNull>(SI->getOperand(1)) ||
+ isa<UndefValue>(SI->getOperand(1))) {
+ ChangeToUnreachable(SI);
+ Changed = true;
+ break;
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ if (II->doesNotThrow()) {
+ ChangeToCall(II);
+ Changed = true;
+ }
+
+ Changed |= ConstantFoldTerminator(BB);
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ Worklist.push_back(*SI);
+ }
+ return Changed;
+}
+
+/// RemoveUnreachableBlocks - Remove blocks that are not reachable, even if they
+/// are in a dead cycle. Return true if a change was made, false otherwise.
+static bool RemoveUnreachableBlocks(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
+ if (Reachable.size() == F.size())
+ return Changed;
+
+ assert(Reachable.size() < F.size());
+ NumSimpl += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
+ continue;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (Reachable.count(*SI))
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ for (Function::iterator I = ++F.begin(); I != F.end();)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
+
+ return true;
+}
+
+/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool IterativeSimplifyCFG(Function &F) {
+ bool Changed = false;
+ bool LocalChange = true;
+ while (LocalChange) {
+ LocalChange = false;
+
+ // Loop over all of the basic blocks (except the first one) and remove them
+ // if they are unneeded...
+ //
+ for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+ if (SimplifyCFG(BBIt++)) {
+ LocalChange = true;
+ ++NumSimpl;
+ }
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+ bool EverChanged = RemoveUnreachableBlocks(F);
+ EverChanged |= IterativeSimplifyCFG(F);
+
+ // If neither pass changed anything, we're done.
+ if (!EverChanged) return false;
+
+ // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
+ // RemoveUnreachableBlocks is needed to nuke them, which means we should
+ // iterate between the two optimizations. We structure the code like this to
+ // avoid reruning IterativeSimplifyCFG if the second pass of
+ // RemoveUnreachableBlocks doesn't do anything.
+ if (!RemoveUnreachableBlocks(F))
+ return true;
+
+ do {
+ EverChanged = IterativeSimplifyCFG(F);
+ EverChanged |= RemoveUnreachableBlocks(F);
+ } while (EverChanged);
+
+ return true;
+}
Added: llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=53163&view=auto
==============================================================================
--- llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp (added)
+++ llvm/branches/non-call-eh/lib/Transforms/Scalar/SimplifyLibCalls.cpp Sun Jul 6 15:45:41 2008
@@ -0,0 +1,1444 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions). For example, a call to the function "exit(3)" that
+// occurs within the main() function can be transformed into a simple "return 3"
+// instruction. Any optimization that takes this form (replace call to library
+// function with simpler code that provides the same result) belongs in this
+// file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class VISIBILITY_HIDDEN LibCallOptimization {
+protected:
+ Function *Caller;
+ const TargetData *TD;
+public:
+ LibCallOptimization() { }
+ virtual ~LibCallOptimization() {}
+
+ /// CallOptimizer - This pure virtual method is implemented by base classes to
+ /// do various optimizations. If this returns null then no transformation was
+ /// performed. If it returns CI, then it transformed the call and CI is to be
+ /// deleted. If it returns something else, replace CI with the new value and
+ /// delete CI.
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) =0;
+
+ Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder &B) {
+ Caller = CI->getParent()->getParent();
+ this->TD = &TD;
+ return CallOptimizer(CI->getCalledFunction(), CI, B);
+ }
+
+ /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+ Value *CastToCStr(Value *V, IRBuilder &B);
+
+ /// EmitStrLen - Emit a call to the strlen function to the builder, for the
+ /// specified pointer. Ptr is required to be some pointer type, and the
+ /// return value has 'intptr_t' type.
+ Value *EmitStrLen(Value *Ptr, IRBuilder &B);
+
+ /// EmitMemCpy - Emit a call to the memcpy function to the builder. This
+ /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+ Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+ unsigned Align, IRBuilder &B);
+
+ /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+ /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+ Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder &B);
+
+ /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+ /// 'floor'). This function is known to take a single of type matching 'Op'
+ /// and returns one value with the same type. If 'Op' is a long double, 'l'
+ /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+ Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder &B);
+
+ /// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+ /// is an integer.
+ void EmitPutChar(Value *Char, IRBuilder &B);
+
+ /// EmitPutS - Emit a call to the puts function. This assumes that Str is
+ /// some pointer.
+ void EmitPutS(Value *Str, IRBuilder &B);
+
+ /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+ /// an i32, and File is a pointer to FILE.
+ void EmitFPutC(Value *Char, Value *File, IRBuilder &B);
+
+ /// EmitFPutS - Emit a call to the puts function. Str is required to be a
+ /// pointer and File is a pointer to FILE.
+ void EmitFPutS(Value *Str, Value *File, IRBuilder &B);
+
+ /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+ /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+ void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder &B);
+
+};
+} // End anonymous namespace.
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder &B) {
+ return B.CreateBitCast(V, PointerType::getUnqual(Type::Int8Ty), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Constant *StrLen =M->getOrInsertFunction("strlen", TD->getIntPtrType(),
+ PointerType::getUnqual(Type::Int8Ty),
+ NULL);
+ return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+}
+
+/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
+/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+ unsigned Align, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Intrinsic::ID IID = Len->getType() == Type::Int32Ty ?
+ Intrinsic::memcpy_i32 : Intrinsic::memcpy_i64;
+ Value *MemCpy = Intrinsic::getDeclaration(M, IID);
+ return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
+ ConstantInt::get(Type::Int32Ty, Align));
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Value *MemChr = M->getOrInsertFunction("memchr",
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ Type::Int32Ty, TD->getIntPtrType(),
+ NULL);
+ return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+ IRBuilder &B) {
+ char NameBuffer[20];
+ if (Op->getType() != Type::DoubleTy) {
+ // If we need to add a suffix, copy into NameBuffer.
+ unsigned NameLen = strlen(Name);
+ assert(NameLen < sizeof(NameBuffer)-2);
+ memcpy(NameBuffer, Name, NameLen);
+ if (Op->getType() == Type::FloatTy)
+ NameBuffer[NameLen] = 'f'; // floorf
+ else
+ NameBuffer[NameLen] = 'l'; // floorl
+ NameBuffer[NameLen+1] = 0;
+ Name = NameBuffer;
+ }
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ return B.CreateCall(Callee, Op, Name);
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,
+ Type::Int32Ty, NULL);
+ B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+void LibCallOptimization::EmitPutS(Value *Str, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Value *F = M->getOrInsertFunction("puts", Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty), NULL);
+ B.CreateCall(F, CastToCStr(Str, B), "puts");
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Constant *F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
+ B.CreateCall2(F, Char, File, "fputc");
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Constant *F = M->getOrInsertFunction("fputs", Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty),
+ File->getType(), NULL);
+ B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder &B) {
+ Module *M = Caller->getParent();
+ Constant *F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD->getIntPtrType(), TD->getIntPtrType(),
+ File->getType(), NULL);
+ B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(), 1), File);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+ // Look through noop bitcast instructions.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+ // If this is a PHI node, there are two cases: either we have already seen it
+ // or we haven't.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (!PHIs.insert(PN))
+ return ~0ULL; // already in the set.
+
+ // If it was new, see if all the input strings are the same length.
+ uint64_t LenSoFar = ~0ULL;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ if (Len == 0) return 0; // Unknown length -> unknown.
+
+ if (Len == ~0ULL) continue;
+
+ if (Len != LenSoFar && LenSoFar != ~0ULL)
+ return 0; // Disagree -> unknown.
+ LenSoFar = Len;
+ }
+
+ // Success, all agree.
+ return LenSoFar;
+ }
+
+ // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ if (Len1 == 0) return 0;
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ if (Len2 == 0) return 0;
+ if (Len1 == ~0ULL) return Len2;
+ if (Len2 == ~0ULL) return Len1;
+ if (Len1 != Len2) return 0;
+ return Len1;
+ }
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return unknown.
+ User *GEP = 0;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return 0;
+ GEP = CE;
+ } else {
+ return 0;
+ }
+
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return 0;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+ if (!Idx->isZero())
+ return 0;
+ } else
+ return 0;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return 0;
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasInitializer())
+ return 0;
+ Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case, which is a degenerate case. The
+ // initializer is constant zero so the length of the string must be zero.
+ if (isa<ConstantAggregateZero>(GlobalInit))
+ return 1; // Len = 0 offset by 1.
+
+ // Must be a Constant Array
+ ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (!Array || Array->getType()->getElementType() != Type::Int8Ty)
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ // Traverse the constant array from StartIdx (derived above) which is
+ // the place the GEP refers to in the array.
+ for (unsigned i = StartIdx; i != NumElts; ++i) {
+ Constant *Elt = Array->getOperand(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return 0;
+ if (CI->isZero())
+ return i-StartIdx+1; // We found end of string, success!
+ }
+
+ return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLength(Value *V) {
+ if (!isa<PointerType>(V->getType())) return 0;
+
+ SmallPtrSet<PHINode*, 32> PHIs;
+ uint64_t Len = GetStringLengthH(V, PHIs);
+ // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+ // an empty string as a length.
+ return Len == ~0ULL ? 1 : Len;
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality())
+ if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+namespace {
+//===---------------------------------------===//
+// 'exit' Optimizations
+
+/// ExitOpt - int main() { exit(4); } --> int main() { return 4; }
+struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Verify we have a reasonable prototype for exit.
+ if (Callee->arg_size() == 0 || !CI->use_empty())
+ return 0;
+
+ // Verify the caller is main, and that the result type of main matches the
+ // argument type of exit.
+ if (!Caller->isName("main") || !Caller->hasExternalLinkage() ||
+ Caller->getReturnType() != CI->getOperand(1)->getType())
+ return 0;
+
+ TerminatorInst *OldTI = CI->getParent()->getTerminator();
+
+ // Create the return after the call.
+ ReturnInst *RI = B.CreateRet(CI->getOperand(1));
+
+ // Drop all successor phi node entries.
+ for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i)
+ OldTI->getSuccessor(i)->removePredecessor(CI->getParent());
+
+ // Erase all instructions from after our return instruction until the end of
+ // the block.
+ BasicBlock::iterator FirstDead = RI; ++FirstDead;
+ CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end());
+ return CI;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'strcat' Optimizations
+
+struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Verify the "strcat" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return 0;
+
+ // Extract some information from the instruction
+ Value *Dst = CI->getOperand(1);
+ Value *Src = CI->getOperand(2);
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+ --Len; // Unbias length.
+
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
+
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B);
+
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Dst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len+1), 1, B);
+ return Dst;
+ }
+};
+
+//===---------------------------------------===//
+// 'strchr' Optimizations
+
+struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Verify the "strchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != PointerType::getUnqual(Type::Int8Ty) ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ Value *SrcStr = CI->getOperand(1);
+
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ if (CharC == 0) {
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32.
+ return 0;
+
+ return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
+ ConstantInt::get(TD->getIntPtrType(), Len), B);
+ }
+
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str))
+ return 0;
+
+ // strchr can find the nul character.
+ Str += '\0';
+ char CharValue = CharC->getSExtValue();
+
+ // Compute the offset.
+ uint64_t i = 0;
+ while (1) {
+ if (i == Str.size()) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
+ // Did we find our match?
+ if (Str[i] == CharValue)
+ break;
+ ++i;
+ }
+
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ Value *Idx = ConstantInt::get(Type::Int64Ty, i);
+ return B.CreateGEP(SrcStr, Idx, "strchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strcmp' Optimizations
+
+struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Verify the "strcmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty))
+ return 0;
+
+ Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str()));
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strncmp' Optimizations
+
+struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Verify the "strncmp" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+ !isa<IntegerType>(FT->getParamType(2)))
+ return 0;
+
+ Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ Length = LengthArg->getZExtValue();
+ else
+ return 0;
+
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
+
+ std::string Str1, Str2;
+ bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(),
+ strncmp(Str1.c_str(), Str2.c_str(), Length));
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// 'strcpy' Optimizations
+
+struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Verify the "strcpy" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty))
+ return 0;
+
+ Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(), Len), 1, B);
+ return Dst;
+ }
+};
+
+
+
+//===---------------------------------------===//
+// 'strlen' Optimizations
+
+struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 ||
+ FT->getParamType(0) != PointerType::getUnqual(Type::Int8Ty) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ Value *Src = CI->getOperand(1);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len-1);
+
+ // Handle strlen(p) != 0.
+ if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0;
+
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'memcmp' Optimizations
+
+struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ FT->getReturnType() != Type::Int32Ty)
+ return 0;
+
+ Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!LenC) return 0;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
+ Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
+ Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv");
+ return B.CreateZExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
+ }
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0
+ if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
+ const Type *PTy = PointerType::getUnqual(Len == 2 ?
+ Type::Int16Ty : Type::Int32Ty);
+ LHS = B.CreateBitCast(LHS, PTy, "tmp");
+ RHS = B.CreateBitCast(RHS, PTy, "tmp");
+ LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
+ LoadInst *RHSV = B.CreateLoad(RHS, "rhsv");
+ LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads.
+ return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'memcpy' Optimizations
+
+struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ FT->getParamType(2) != TD->getIntPtrType())
+ return 0;
+
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+ return CI->getOperand(1);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'pow*' Optimizations
+
+struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPoint())
+ return 0;
+
+ Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ return Op1C;
+ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ return EmitUnaryFloatFnCall(Op2, "exp2", B);
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return 0;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5)) {
+ // FIXME: This is not safe for -0.0 and -inf. This can only be done when
+ // 'unsafe' math optimizations are allowed.
+ // x pow(x, 0.5) sqrt(x)
+ // ---------------------------------------------
+ // -0.0 +0.0 -0.0
+ // -inf +inf NaN
+#if 0
+ // pow(x, 0.5) -> sqrt(x)
+ return B.CreateCall(get_sqrt(), Op1, "sqrt");
+#endif
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'exp2' Optimizations
+
+struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPoint())
+ return 0;
+
+ Value *Op = CI->getOperand(1);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType() == Type::FloatTy)
+ Name = "ldexpf";
+ else if (Op->getType() == Type::DoubleTy)
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(APFloat(1.0f));
+ if (Op->getType() != Type::FloatTy)
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), Type::Int32Ty,NULL);
+ return B.CreateCall2(Callee, One, LdExpArg);
+ }
+ return 0;
+ }
+};
+
+
+//===---------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
+ FT->getParamType(0) != Type::DoubleTy)
+ return 0;
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
+ if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B);
+ return B.CreateFPExt(V, Type::DoubleTy);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'ffs*' Optimizations
+
+struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty ||
+ !isa<IntegerType>(FT->getParamType(0)))
+ return 0;
+
+ Value *Op = CI->getOperand(1);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->getValue() == 0) // ffs(0) -> 0.
+ return Constant::getNullValue(CI->getType());
+ return ConstantInt::get(Type::Int32Ty, // ffs(c) -> cttz(c)+1
+ CI->getValue().countTrailingZeros()+1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ const Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, &ArgType, 1);
+ Value *V = B.CreateCall(F, Op, "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(Type::Int32Ty, 1), "tmp");
+ V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp");
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ return B.CreateSelect(Cond, V, ConstantInt::get(Type::Int32Ty, 0));
+ }
+};
+
+//===---------------------------------------===//
+// 'isdigit' Optimizations
+
+struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ FT->getParamType(0) != Type::Int32Ty)
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getOperand(1);
+ Op = B.CreateSub(Op, ConstantInt::get(Type::Int32Ty, '0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'isascii' Optimizations
+
+struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ FT->getParamType(0) != Type::Int32Ty)
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getOperand(1);
+ Op = B.CreateICmpULT(Op, ConstantInt::get(Type::Int32Ty, 128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+//===---------------------------------------===//
+// 'abs', 'labs', 'llabs' Optimizations
+
+struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getOperand(1);
+ Value *Pos = B.CreateICmpSGT(Op,ConstantInt::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+
+//===---------------------------------------===//
+// 'toascii' Optimizations
+
+struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != Type::Int32Ty)
+ return 0;
+
+ // isascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getOperand(1), ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'printf' Optimizations
+
+struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
+ !(isa<IntegerType>(FT->getReturnType()) ||
+ FT->getReturnType() == Type::VoidTy))
+ return 0;
+
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0);
+
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ EmitPutChar(ConstantInt::get(Type::Int32Ty, FormatStr[0]), B);
+ return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == std::string::npos) { // no format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr.erase(FormatStr.end()-1);
+ Constant *C = ConstantArray::get(FormatStr, true);
+ C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage,
+ C, "str", Callee->getParent());
+ EmitPutS(C, B);
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(*(i8*)dst)
+ if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
+ isa<IntegerType>(CI->getOperand(2)->getType())) {
+ EmitPutChar(CI->getOperand(2), B);
+ return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 1);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
+ isa<PointerType>(CI->getOperand(2)->getType()) &&
+ CI->use_empty()) {
+ EmitPutS(CI->getOperand(2), B);
+ return CI;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'sprintf' Optimizations
+
+struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ // Check for a fixed format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ return 0;
+
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumOperands() == 3) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return 0; // we found a format specifier, bail out.
+
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
+ ConstantInt::get(TD->getIntPtrType(), FormatStr.size()+1),1,B);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char");
+ Value *Ptr = CastToCStr(CI->getOperand(1), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::Int32Ty, 1), "nul");
+ B.CreateStore(Constant::getNullValue(Type::Int8Ty), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
+
+ Value *Len = EmitStrLen(CI->getOperand(3), B);
+ Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1),
+ "leninc");
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
+
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fwrite' Optimizations
+
+struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<IntegerType>(FT->getParamType(1)) ||
+ !isa<IntegerType>(FT->getParamType(2)) ||
+ !isa<PointerType>(FT->getParamType(3)) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeC || !CountC) return 0;
+ uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
+
+ // If this is writing one byte, turn it into fputc.
+ if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
+ EmitFPutC(Char, CI->getOperand(4), B);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'fputs' Optimizations
+
+struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Require two pointers. Also, we can't optimize if return value is used.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !CI->use_empty())
+ return 0;
+
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getOperand(1));
+ if (!Len) return 0;
+ EmitFWrite(CI->getOperand(1), ConstantInt::get(TD->getIntPtrType(), Len-1),
+ CI->getOperand(2), B);
+ return CI; // Known to have no uses (see above).
+ }
+};
+
+//===---------------------------------------===//
+// 'fprintf' Optimizations
+
+struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
+ !isa<PointerType>(FT->getParamType(1)) ||
+ !isa<IntegerType>(FT->getReturnType()))
+ return 0;
+
+ // All the optimizations depend on the format string.
+ std::string FormatStr;
+ if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ return 0;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumOperands() == 3) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return 0; // We found a format specifier.
+
+ EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(),
+ FormatStr.size()),
+ CI->getOperand(1), B);
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
+
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ return 0;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> *(i8*)dst = chr
+ if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
+ return ConstantInt::get(CI->getType(), 1);
+ }
+
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) -> fputs(str, F)
+ if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
+ return 0;
+ EmitFPutS(CI->getOperand(3), CI->getOperand(1), B);
+ return CI;
+ }
+ return 0;
+ }
+};
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// This pass optimizes well known library functions from libc and libm.
+ ///
+ class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass {
+ StringMap<LibCallOptimization*> Optimizations;
+ // Miscellaneous LibCall Optimizations
+ ExitOpt Exit;
+ // String and Memory LibCall Optimizations
+ StrCatOpt StrCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp;
+ StrCpyOpt StrCpy; StrLenOpt StrLen; MemCmpOpt MemCmp; MemCpyOpt MemCpy;
+ // Math Library Optimizations
+ PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ // Integer Optimizations
+ FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
+ ToAsciiOpt ToAscii;
+ // Formatting and IO Optimizations
+ SPrintFOpt SPrintF; PrintFOpt PrintF;
+ FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+ public:
+ static char ID; // Pass identification
+ SimplifyLibCalls() : FunctionPass((intptr_t)&ID) {}
+
+ void InitOptimizations();
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetData>();
+ }
+ };
+ char SimplifyLibCalls::ID = 0;
+} // end anonymous namespace.
+
+static RegisterPass<SimplifyLibCalls>
+X("simplify-libcalls", "Simplify well-known library calls");
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+ return new SimplifyLibCalls();
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+ // Miscellaneous LibCall Optimizations
+ Optimizations["exit"] = &Exit;
+
+ // String and Memory LibCall Optimizations
+ Optimizations["strcat"] = &StrCat;
+ Optimizations["strchr"] = &StrChr;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+ Optimizations["strcpy"] = &StrCpy;
+ Optimizations["strlen"] = &StrLen;
+ Optimizations["memcmp"] = &MemCmp;
+ Optimizations["memcpy"] = &MemCpy;
+
+ // Math Library Optimizations
+ Optimizations["powf"] = &Pow;
+ Optimizations["pow"] = &Pow;
+ Optimizations["powl"] = &Pow;
+ Optimizations["exp2l"] = &Exp2;
+ Optimizations["exp2"] = &Exp2;
+ Optimizations["exp2f"] = &Exp2;
+
+#ifdef HAVE_FLOORF
+ Optimizations["floor"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_CEILF
+ Optimizations["ceil"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_ROUNDF
+ Optimizations["round"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_RINTF
+ Optimizations["rint"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_NEARBYINTF
+ Optimizations["nearbyint"] = &UnaryDoubleFP;
+#endif
+
+ // Integer Optimizations
+ Optimizations["ffs"] = &FFS;
+ Optimizations["ffsl"] = &FFS;
+ Optimizations["ffsll"] = &FFS;
+ Optimizations["abs"] = &Abs;
+ Optimizations["labs"] = &Abs;
+ Optimizations["llabs"] = &Abs;
+ Optimizations["isdigit"] = &IsDigit;
+ Optimizations["isascii"] = &IsAscii;
+ Optimizations["toascii"] = &ToAscii;
+
+ // Formatting and IO Optimizations
+ Optimizations["sprintf"] = &SPrintF;
+ Optimizations["printf"] = &PrintF;
+ Optimizations["fwrite"] = &FWrite;
+ Optimizations["fputs"] = &FPuts;
+ Optimizations["fprintf"] = &FPrintF;
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+ if (Optimizations.empty())
+ InitOptimizations();
+
+ const TargetData &TD = getAnalysis<TargetData>();
+
+ IRBuilder Builder;
+
+ bool Changed = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Ignore non-calls.
+ CallInst *CI = dyn_cast<CallInst>(I++);
+ if (!CI) continue;
+
+ // Ignore indirect calls and calls to non-external functions.
+ Function *Callee = CI->getCalledFunction();
+ if (Callee == 0 || !Callee->isDeclaration() ||
+ !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+ continue;
+
+ // Ignore unknown calls.
+ const char *CalleeName = Callee->getNameStart();
+ StringMap<LibCallOptimization*>::iterator OMI =
+ Optimizations.find(CalleeName, CalleeName+Callee->getNameLen());
+ if (OMI == Optimizations.end()) continue;
+
+ // Set the builder to the instruction after the call.
+ Builder.SetInsertPoint(BB, I);
+
+ // Try to optimize this call.
+ Value *Result = OMI->second->OptimizeCall(CI, TD, Builder);
+ if (Result == 0) continue;
+
+ DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI;
+ DOUT << " into: " << *Result << "\n");
+
+ // Something changed!
+ Changed = true;
+ ++NumSimplified;
+
+ // Inspect the instruction after the call (which was potentially just
+ // added) next.
+ I = CI; ++I;
+
+ if (CI != Result && !CI->use_empty()) {
+ CI->replaceAllUsesWith(Result);
+ if (!Result->hasName())
+ Result->takeName(CI);
+ }
+ CI->eraseFromParent();
+ }
+ }
+ return Changed;
+}
+
+
+// TODO:
+// Additional cases that we need to add to this file:
+//
+// cbrt:
+// * cbrt(expN(X)) -> expN(x/3)
+// * cbrt(sqrt(x)) -> pow(x,1/6)
+// * cbrt(sqrt(x)) -> pow(x,1/9)
+//
+// cos, cosf, cosl:
+// * cos(-x) -> cos(x)
+//
+// exp, expf, expl:
+// * exp(log(x)) -> x
+//
+// log, logf, logl:
+// * log(exp(x)) -> x
+// * log(x**y) -> y*log(x)
+// * log(exp(y)) -> y*log(e)
+// * log(exp2(y)) -> y*log(2)
+// * log(exp10(y)) -> y*log(10)
+// * log(sqrt(x)) -> 0.5*log(x)
+// * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+// * lround(cnst) -> cnst'
+//
+// memcmp:
+// * memcmp(x,y,l) -> cnst
+// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
+//
+// memmove:
+// * memmove(d,s,l,a) -> memcpy(d,s,l,a)
+// (if s is a global constant array)
+//
+// pow, powf, powl:
+// * pow(exp(x),y) -> exp(x*y)
+// * pow(sqrt(x),y) -> pow(x,y*0.5)
+// * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// puts:
+// * puts("") -> putchar("\n")
+//
+// round, roundf, roundl:
+// * round(cnst) -> cnst'
+//
+// signbit:
+// * signbit(cnst) -> cnst'
+// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+// * sqrt(expN(x)) -> expN(x*0.5)
+// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// stpcpy:
+// * stpcpy(str, "literal") ->
+// llvm.memcpy(str,"literal",strlen("literal")+1,1)
+// strrchr:
+// * strrchr(s,c) -> reverse_offset_of_in(c,s)
+// (if c is a constant integer and s is a constant string)
+// * strrchr(s1,0) -> strchr(s1,0)
+//
+// strncat:
+// * strncat(x,y,0) -> x
+// * strncat(x,y,0) -> x (if strlen(y) = 0)
+// * strncat(x,y,l) -> strcat(x,y) (if y and l are constants an l > strlen(y))
+//
+// strncpy:
+// * strncpy(d,s,0) -> d
+// * strncpy(d,s,l) -> memcpy(d,s,l,1)
+// (if s and l are constants)
+//
+// strpbrk:
+// * strpbrk(s,a) -> offset_in_for(s,a)
+// (if s and a are both constant strings)
+// * strpbrk(s,"") -> 0
+// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
+//
+// strspn, strcspn:
+// * strspn(s,a) -> const_int (if both args are constant)
+// * strspn("",a) -> 0
+// * strspn(s,"") -> 0
+// * strcspn(s,a) -> const_int (if both args are constant)
+// * strcspn("",a) -> 0
+// * strcspn(s,"") -> strlen(a)
+//
+// strstr:
+// * strstr(x,x) -> x
+// * strstr(s1,s2) -> offset_of_s2_in(s1)
+// (if s1 and s2 are constant strings)
+//
+// tan, tanf, tanl:
+// * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+// * trunc(cnst) -> cnst'
+//
+//
More information about the llvm-commits
mailing list