Tue Nov 13 07:21:50 PST 2012

Modified: llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================

--- llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp Tue Nov 13 09:21:47 2012
@@ -158,7 +158,6 @@
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
 
   RegInfo = TM.getRegisterInfo();
   TD = getDataLayout();
@@ -180,7 +179,7 @@
     setSchedulingPreference(Sched::ILP);
   else
     setSchedulingPreference(Sched::RegPressure);
-  setStackPointerRegisterToSaveRestore(X86StackPtr);
+  setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
 
   // Bypass i32 with i8 on Atom when compiling with O2
   if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
@@ -948,6 +947,13 @@
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
 
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
+    // As there is no 64-bit GPR available, we need build a special custom
+    // sequence to convert from v2i32 to v2f32.
+    if (!Subtarget->is64Bit())
+      setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
+
     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
 
@@ -1067,6 +1073,10 @@
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
 
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
+
     setLoadExtAction(ISD::EXTLOAD,              MVT::v4f32, Legal);
 
     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
@@ -1257,7 +1267,6 @@
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::TRUNCATE);
-  setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
   if (Subtarget->is64Bit())
@@ -2174,16 +2183,14 @@
 /// optimization is performed and it is required (FPDiff!=0).
 static SDValue
 EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
-                         SDValue Chain, SDValue RetAddrFrIdx,
-                         bool Is64Bit, int FPDiff, DebugLoc dl) {
+                         SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT,
+                         unsigned SlotSize, int FPDiff, DebugLoc dl) {
   // Store the return address to the appropriate stack slot.
   if (!FPDiff) return Chain;
   // Calculate the new stack slot for the return address.
-  int SlotSize = Is64Bit ? 8 : 4;
   int NewReturnAddrFI =
     MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
-  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
-  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
                        MachinePointerInfo::getFixedStack(NewReturnAddrFI),
                        false, false, 0);
@@ -2258,14 +2265,15 @@
   int FPDiff = 0;
   if (isTailCall && !IsSibcall) {
     // Lower arguments at fp - stackoffset + fpdiff.
-    unsigned NumBytesCallerPushed =
-      MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+    X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
+
     FPDiff = NumBytesCallerPushed - NumBytes;
 
     // Set the delta of movement of the returnaddr stackslot.
     // But only set if delta is greater than previous delta.
-    if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
-      MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+    if (FPDiff < X86Info->getTCReturnAddrDelta())
+      X86Info->setTCReturnAddrDelta(FPDiff);
   }
 
   if (!IsSibcall)
@@ -2342,7 +2350,8 @@
     } else if (!IsSibcall && (!isTailCall || isByVal)) {
       assert(VA.isMemLoc());
       if (StackPtr.getNode() == 0)
-        StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+        StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+                                      getPointerTy());
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
                                              dl, DAG, VA, Flags));
     }
@@ -2430,7 +2439,8 @@
           // Copy relative to framepointer.
           SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
           if (StackPtr.getNode() == 0)
-            StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+            StackPtr = DAG.getCopyFromReg(Chain, dl,
+                                          RegInfo->getStackRegister(),
                                           getPointerTy());
           Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
 
@@ -2452,7 +2462,8 @@
                           &MemOpChains2[0], MemOpChains2.size());
 
     // Store the return address to the appropriate stack slot.
-    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
+                                     getPointerTy(), RegInfo->getSlotSize(),
                                      FPDiff, dl);
   }
 
@@ -2664,7 +2675,7 @@
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
-  uint64_t SlotSize = TD->getPointerSize(0);
+  unsigned SlotSize = RegInfo->getSlotSize();
   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
     // Number smaller than 12 so just add the difference.
     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
@@ -3032,7 +3043,7 @@
 
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
-    uint64_t SlotSize = TD->getPointerSize(0);
+    unsigned SlotSize = RegInfo->getSlotSize();
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
                                                            false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -4625,7 +4636,6 @@
     MVT ShufVT = V.getValueType().getSimpleVT();
     unsigned NumElems = ShufVT.getVectorNumElements();
     SmallVector<int, 16> ShuffleMask;
-    SDValue ImmN;
     bool IsUnary;
 
     if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
@@ -6429,17 +6439,17 @@
 }
 
 static bool MayFoldVectorLoad(SDValue V) {
-  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+  while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
     V = V.getOperand(0);
+
   if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
     V = V.getOperand(0);
   if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
       V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
     // BUILD_VECTOR (load), undef
     V = V.getOperand(0);
-  if (MayFoldLoad(V))
-    return true;
-  return false;
+
+  return MayFoldLoad(V);
 }
 
 // FIXME: the version above should always be used. Since there's
@@ -6562,6 +6572,81 @@
                               getShuffleSHUFImmediate(SVOp), DAG);
 }
 
+// Reduce a vector shuffle to zext.
+SDValue
+X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+  // PMOVZX is only available from SSE41.
+  if (!Subtarget->hasSSE41())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+
+  // Only AVX2 support 256-bit vector integer extending.
+  if (!Subtarget->hasAVX2() && VT.is256BitVector())
+    return SDValue();
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  unsigned NumElems = VT.getVectorNumElements();
+
+  // Extending is an unary operation and the element type of the source vector
+  // won't be equal to or larger than i64.
+  if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
+      VT.getVectorElementType() == MVT::i64)
+    return SDValue();
+
+  // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
+  unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
+  while ((1U << Shift) < NumElems) {
+    if (SVOp->getMaskElt(1U << Shift) == 1)
+      break;
+    Shift += 1;
+    // The maximal ratio is 8, i.e. from i8 to i64.
+    if (Shift > 3)
+      return SDValue();
+  }
+
+  // Check the shuffle mask.
+  unsigned Mask = (1U << Shift) - 1;
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int EltIdx = SVOp->getMaskElt(i);
+    if ((i & Mask) != 0 && EltIdx != -1)
+      return SDValue();
+    if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
+      return SDValue();
+  }
+
+  unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
+  EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
+  EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+
+  if (!isTypeLegal(NVT))
+    return SDValue();
+
+  // Simplify the operand as it's prepared to be fed into shuffle.
+  unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
+  if (V1.getOpcode() == ISD::BITCAST &&
+      V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+      V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      V1.getOperand(0)
+        .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+    // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
+    SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
+    ConstantSDNode *CIdx =
+      dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
+    // If it's foldable, i.e. normal load with single use, we will let code
+    // selection to fold it. Otherwise, we will short the conversion sequence.
+    if (CIdx && CIdx->getZExtValue() == 0 &&
+        (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+      V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+  }
+
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
+}
+
 SDValue
 X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -6592,6 +6677,11 @@
     return PromoteSplat(SVOp, DAG);
   }
 
+  // Check integer expanding shuffles.
+  SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+  if (NewOp.getNode())
+    return NewOp;
+
   // If the shuffle can be profitably rewritten as a narrower shuffle, then
   // do it!
   if (VT == MVT::v8i16  || VT == MVT::v16i8 ||
@@ -7734,7 +7824,7 @@
       IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
                         false, false, false, 0);
 
-    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize(0)),
+    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
                                     getPointerTy());
     IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
 
@@ -8009,11 +8099,29 @@
   return Sub;
 }
 
+SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue N0 = Op.getOperand(0);
+  EVT SVT = N0.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
+          SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
+         "Custom UINT_TO_FP is not supported!");
+
+  EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+  return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+                     DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+}
+
 SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDValue N0 = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
 
+  if (Op.getValueType().isVector())
+    return lowerUINT_TO_FP_vec(Op, DAG);
+
   // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
   // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
   // the optimization here.
@@ -8187,6 +8295,30 @@
   }
 }
 
+SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue In = Op.getOperand(0);
+  EVT SVT = In.getValueType();
+
+  if (!VT.is256BitVector() || !SVT.is128BitVector() ||
+      VT.getVectorNumElements() != SVT.getVectorNumElements())
+    return SDValue();
+
+  assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+
+  // AVX2 has better support of integer extending.
+  if (Subtarget->hasAVX2())
+    return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+  SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
+  static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
+  SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
+                           DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+}
+
 SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
   EVT VT = Op.getValueType();
@@ -9640,7 +9772,8 @@
     Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
     Flag = Chain.getValue(1);
 
-    Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+    Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+                               SPTy).getValue(1);
 
     SDValue Ops1[2] = { Chain.getValue(0), Chain };
     return DAG.getMergeValues(Ops1, 2, dl);
@@ -10412,21 +10545,21 @@
 
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   DebugLoc dl = Op.getDebugLoc();
+  EVT PtrVT = getPointerTy();
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-      DAG.getConstant(TD->getPointerSize(0),
-                      Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
-    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+      DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, PtrVT,
                                    FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                      RetAddrFI, MachinePointerInfo(), false, false, false, 0);
 }
 
@@ -10448,7 +10581,7 @@
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  return DAG.getIntPtrConstant(2*TD->getPointerSize(0));
+  return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
 }
 
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -10463,7 +10596,7 @@
   unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
 
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
-                                  DAG.getIntPtrConstant(TD->getPointerSize(0)));
+                                  DAG.getIntPtrConstant(RegInfo->getSlotSize()));
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
@@ -11492,6 +11625,7 @@
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
   case ISD::TRUNCATE:           return lowerTRUNCATE(Op, DAG);
+  case ISD::ZERO_EXTEND:        return lowerZERO_EXTEND(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
   case ISD::FP_EXTEND:          return lowerFP_EXTEND(Op, DAG);
@@ -11625,6 +11759,22 @@
     }
     return;
   }
+  case ISD::UINT_TO_FP: {
+    if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+        N->getValueType(0) != MVT::v2f32)
+      return;
+    SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
+                                 N->getOperand(0));
+    SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+                                     MVT::f64);
+    SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
+    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
+                             DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
+    Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
+    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
+    Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
+    return;
+  }
   case ISD::FP_ROUND: {
     SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
     Results.push_back(V);
@@ -11825,6 +11975,8 @@
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
   case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VZEXT:              return "X86ISD::VZEXT";
+  case X86ISD::VSEXT:              return "X86ISD::VSEXT";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
@@ -14249,6 +14401,14 @@
     return NewOp;
 
   SDValue InputVector = N->getOperand(0);
+  // Detect whether we are trying to convert from mmx to i32 and the bitcast
+  // from mmx to v2i32 has a single usage.
+  if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
+      InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
+      InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
+    return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),
+                       N->getValueType(0),
+                       InputVector.getNode()->getOperand(0));
 
   // Only operate on vectors of 4 elements, where the alternative shuffling
   // gets to be more expensive.
@@ -16316,6 +16476,16 @@
   return SDValue();
 }
 
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg" 
+// as "sbb reg,reg", since it can be extended without zext and produces 
+// an all-ones bit which is more useful than 0/1 in some cases.
+static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
+  return DAG.getNode(ISD::AND, DL, MVT::i8,
+                     DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+                                 DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
+                     DAG.getConstant(1, MVT::i8));
+}
+
 // Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
 static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
@@ -16324,14 +16494,29 @@
   X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
   SDValue EFLAGS = N->getOperand(1);
 
+  if (CC == X86::COND_A) {
+    // Try to convert COND_A into COND_B in an attempt to facilitate 
+    // materializing "setb reg".
+    //
+    // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+    // cannot take an immediate as its first operand.
+    //
+    if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && 
+        EFLAGS.getValueType().isInteger() &&
+        !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+      SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
+                                   EFLAGS.getNode()->getVTList(),
+                                   EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+      SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+      return MaterializeSETB(DL, NewEFLAGS, DAG);
+    }
+  }
+
   // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
   // a zext and produces an all-ones bit which is more useful than 0/1 in some
   // cases.
   if (CC == X86::COND_B)
-    return DAG.getNode(ISD::AND, DL, MVT::i8,
-                       DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
-                                   DAG.getConstant(CC, MVT::i8), EFLAGS),
-                       DAG.getConstant(1, MVT::i8));
+    return MaterializeSETB(DL, EFLAGS, DAG);
 
   SDValue Flags;
 
@@ -16367,23 +16552,6 @@
   return SDValue();
 }
 
-static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) {
-  SDValue Op0 = N->getOperand(0);
-  EVT InVT = Op0->getValueType(0);
-
-  // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
-  if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
-    DebugLoc dl = N->getDebugLoc();
-    MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
-    SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
-    // Notice that we use SINT_TO_FP because we know that the high bits
-    // are zero and SINT_TO_FP is better supported by the hardware.
-    return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
-  }
-
-  return SDValue();
-}
-
 static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
                                         const X86TargetLowering *XTLI) {
   SDValue Op0 = N->getOperand(0);
@@ -16529,6 +16697,21 @@
   return OptimizeConditionalInDecrement(N, DAG);
 }
 
+/// performVZEXTCombine - Performs build vector combines
+static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI,
+                                        const X86Subtarget *Subtarget) {
+  // (vzext (bitcast (vzext (x)) -> (vzext x)
+  SDValue In = N->getOperand(0);
+  while (In.getOpcode() == ISD::BITCAST)
+    In = In.getOperand(0);
+
+  if (In.getOpcode() != X86ISD::VZEXT)
+    return SDValue();
+
+  return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -16551,7 +16734,6 @@
   case ISD::XOR:            return PerformXorCombine(N, DAG, DCI, Subtarget);
   case ISD::LOAD:           return PerformLOADCombine(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
-  case ISD::UINT_TO_FP:     return PerformUINT_TO_FPCombine(N, DAG);
   case ISD::SINT_TO_FP:     return PerformSINT_TO_FPCombine(N, DAG, this);
   case ISD::FADD:           return PerformFADDCombine(N, DAG, Subtarget);
   case ISD::FSUB:           return PerformFSUBCombine(N, DAG, Subtarget);
@@ -16569,6 +16751,7 @@
   case ISD::SETCC:          return PerformISDSETCCCombine(N, DAG);
   case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG, DCI, Subtarget);
   case X86ISD::BRCOND:      return PerformBrCondCombine(N, DAG, DCI, Subtarget);
+  case X86ISD::VZEXT:       return performVZEXTCombine(N, DAG, DCI, Subtarget);
   case X86ISD::SHUFP:       // Handle all target specific shuffles
   case X86ISD::PALIGN:
   case X86ISD::UNPCKH:
@@ -16996,7 +17179,7 @@
     return;
   case 'K':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-      if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+      if (isInt<8>(C->getSExtValue())) {
         Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
         break;
       }
@@ -17321,3 +17504,207 @@
 
   return Res;
 }
+
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+struct X86CostTblEntry {
+  int ISD;
+  MVT Type;
+  unsigned Cost;
+};
+
+static int
+FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
+  for (unsigned int i = 0; i < len; ++i)
+    if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
+      return i;
+
+  // Could not find an entry.
+  return -1;
+}
+
+struct X86TypeConversionCostTblEntry {
+  int ISD;
+  MVT Dst;
+  MVT Src;
+  unsigned Cost;
+};
+
+static int
+FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
+                   int ISD, MVT Dst, MVT Src) {
+  for (unsigned int i = 0; i < len; ++i)
+    if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
+      return i;
+
+  // Could not find an entry.
+  return -1;
+}
+
+unsigned
+X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
+                                                     Type *Ty) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
+
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+  static const X86CostTblEntry AVX1CostTable[] = {
+    // We don't have to scalarize unsupported ops. We can issue two half-sized
+    // operations and we only need to extract the upper YMM half.
+    // Two ops + 1 extract + 1 insert = 4.
+    { ISD::MUL,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v8i32,    4 },
+    { ISD::ADD,     MVT::v8i32,    4 },
+    { ISD::MUL,     MVT::v4i64,    4 },
+    { ISD::SUB,     MVT::v4i64,    4 },
+    { ISD::ADD,     MVT::v4i64,    4 },
+    };
+
+  // Look for AVX1 lowering tricks.
+  if (ST.hasAVX()) {
+    int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
+                          LT.second);
+    if (Idx != -1)
+      return LT.first * AVX1CostTable[Idx].Cost;
+  }
+  // Fallback to the default implementation.
+  return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
+}
+
+unsigned
+X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                                 unsigned Index) const {
+  assert(Val->isVectorTy() && "This must be a vector type");
+
+  if (Index != -1U) {
+    // Legalize the type.
+    std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val);
+
+    // This type is legalized to a scalar type.
+    if (!LT.second.isVector())
+      return 0;
+
+    // The type may be split. Normalize the index to the new type.
+    unsigned Width = LT.second.getVectorNumElements();
+    Index = Index % Width;
+
+    // Floating point scalars are already located in index #0.
+    if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+      return 0;
+  }
+
+  return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
+                                                          Type *ValTy,
+                                                          Type *CondTy) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
+
+  MVT MTy = LT.second;
+
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  const X86Subtarget &ST =
+  TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+  static const X86CostTblEntry SSE42CostTbl[] = {
+    { ISD::SETCC,   MVT::v2f64,   1 },
+    { ISD::SETCC,   MVT::v4f32,   1 },
+    { ISD::SETCC,   MVT::v2i64,   1 },
+    { ISD::SETCC,   MVT::v4i32,   1 },
+    { ISD::SETCC,   MVT::v8i16,   1 },
+    { ISD::SETCC,   MVT::v16i8,   1 },
+  };
+
+  static const X86CostTblEntry AVX1CostTbl[] = {
+    { ISD::SETCC,   MVT::v4f64,   1 },
+    { ISD::SETCC,   MVT::v8f32,   1 },
+    // AVX1 does not support 8-wide integer compare.
+    { ISD::SETCC,   MVT::v4i64,   4 },
+    { ISD::SETCC,   MVT::v8i32,   4 },
+    { ISD::SETCC,   MVT::v16i16,  4 },
+    { ISD::SETCC,   MVT::v32i8,   4 },
+  };
+
+  static const X86CostTblEntry AVX2CostTbl[] = {
+    { ISD::SETCC,   MVT::v4i64,   1 },
+    { ISD::SETCC,   MVT::v8i32,   1 },
+    { ISD::SETCC,   MVT::v16i16,  1 },
+    { ISD::SETCC,   MVT::v32i8,   1 },
+  };
+
+  if (ST.hasSSE42()) {
+    int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * SSE42CostTbl[Idx].Cost;
+  }
+
+  if (ST.hasAVX()) {
+    int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX1CostTbl[Idx].Cost;
+  }
+
+  if (ST.hasAVX2()) {
+    int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX2CostTbl[Idx].Cost;
+  }
+
+  return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode,
+                                                        Type *Dst,
+                                                        Type *Src) const {
+  int ISD = InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  EVT SrcTy = TLI->getValueType(Src);
+  EVT DstTy = TLI->getValueType(Dst);
+
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
+
+  const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+  static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
+    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+    { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 1 },
+    { ISD::TRUNCATE,    MVT::v8i16, MVT::v8i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
+    { ISD::FP_TO_SINT,  MVT::v8i8, MVT::v8f32,  1 },
+    { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32,  1 },
+    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
+    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1,  9 },
+    { ISD::TRUNCATE,    MVT::v8i32, MVT::v8i64, 3 },
+  };
+
+  if (ST.hasAVX()) {
+    int Idx = FindInConvertTable(AVXConversionTbl,
+                                 array_lengthof(AVXConversionTbl),
+                                 ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+    if (Idx != -1)
+      return AVXConversionTbl[Idx].Cost;
+  }
+
+  return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
+}
+

Modified: llvm/branches/R600/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelLowering.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelLowering.h Tue Nov 13 09:21:47 2012
@@ -19,6 +19,7 @@
 #include "X86RegisterInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -142,6 +143,10 @@
       /// mnemonic, so do I; blame Intel.
       MOVDQ2Q,
 
+      /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+      /// vector to a GPR.
+      MMX_MOVD2W,
+
       /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRB.
       PEXTRB,
@@ -236,6 +241,12 @@
       // VSEXT_MOVL - Vector move low and sign extend.
       VSEXT_MOVL,
 
+      // VZEXT - Vector integer zero-extend.
+      VZEXT,
+
+      // VSEXT - Vector integer signed-extend.
+      VSEXT,
+
       // VFPEXT - Vector FP extend.
       VFPEXT,
 
@@ -474,10 +485,6 @@
     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                  unsigned JTI, MCContext &Ctx) const;
 
-    /// getStackPtrReg - Return the stack pointer register we are using: either
-    /// ESP or RSP.
-    unsigned getStackPtrReg() const { return X86StackPtr; }
-
     /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
     /// function arguments in the caller parameter area. For X86, aggregates
     /// that contains are placed at 16-byte boundaries while the rest are at
@@ -712,9 +719,6 @@
     const X86RegisterInfo *RegInfo;
     const DataLayout *TD;
 
-    /// X86StackPtr - X86 physical register used as stack ptr.
-    unsigned X86StackPtr;
-
     /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     /// floating point ops.
     /// When SSE is available, use it for f32 operations.
@@ -795,7 +799,9 @@
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -832,6 +838,8 @@
 
     SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
@@ -939,6 +947,23 @@
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
+
+  class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
+  public:
+    explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
+    VectorTargetTransformImpl(TL) {}
+
+    virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+
+    virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                        unsigned Index) const;
+
+    unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                Type *CondTy) const;
+
+    virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                      Type *Src) const;
+  };
 }
 
 #endif    // X86ISELLOWERING_H

Modified: llvm/branches/R600/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Nov 13 09:21:47 2012
@@ -90,6 +90,14 @@
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
+def X86vzext   : SDNode<"X86ISD::VZEXT",
+                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                              SDTCisInt<0>, SDTCisInt<1>]>>;
+
+def X86vsext   : SDNode<"X86ISD::VSEXT",
+                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                              SDTCisInt<0>, SDTCisInt<1>]>>;
+
 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                              SDTCisFP<0>, SDTCisFP<1>]>>;

Modified: llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp Tue Nov 13 09:21:47 2012
@@ -1122,22 +1122,26 @@
     // FIXME: add AVX 256-bit foldable instructions
 
     // FMA4 foldable patterns
-    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        TB_ALIGN_16 },
-    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        TB_ALIGN_16 },
+    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        0           },
+    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        0           },
     { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        TB_ALIGN_16 },
     { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        TB_ALIGN_16 },
     { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       TB_ALIGN_32 },
     { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       TB_ALIGN_32 },
+    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       0           },
+    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       0           },
     { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       TB_ALIGN_16 },
     { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       TB_ALIGN_16 },
     { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      TB_ALIGN_32 },
     { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      TB_ALIGN_32 },
-    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        TB_ALIGN_16 },
-    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        TB_ALIGN_16 },
+    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        0           },
+    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        0           },
     { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        TB_ALIGN_16 },
     { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        TB_ALIGN_16 },
     { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       TB_ALIGN_32 },
     { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       TB_ALIGN_32 },
+    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       0           },
+    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       0           },
     { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       TB_ALIGN_16 },
     { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       TB_ALIGN_16 },
     { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      TB_ALIGN_32 },
@@ -1283,22 +1287,26 @@
     { X86::VFMSUBADDPDr213rY,     X86::VFMSUBADDPDr213mY,     TB_ALIGN_32 },
 
     // FMA4 foldable patterns
-    { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           TB_ALIGN_16 },
-    { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           TB_ALIGN_16 },
+    { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           0           },
+    { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           0           },
     { X86::VFMADDPS4rr,           X86::VFMADDPS4rm,           TB_ALIGN_16 },
     { X86::VFMADDPD4rr,           X86::VFMADDPD4rm,           TB_ALIGN_16 },
     { X86::VFMADDPS4rrY,          X86::VFMADDPS4rmY,          TB_ALIGN_32 },
     { X86::VFMADDPD4rrY,          X86::VFMADDPD4rmY,          TB_ALIGN_32 },
+    { X86::VFNMADDSS4rr,          X86::VFNMADDSS4rm,          0           },
+    { X86::VFNMADDSD4rr,          X86::VFNMADDSD4rm,          0           },
     { X86::VFNMADDPS4rr,          X86::VFNMADDPS4rm,          TB_ALIGN_16 },
     { X86::VFNMADDPD4rr,          X86::VFNMADDPD4rm,          TB_ALIGN_16 },
     { X86::VFNMADDPS4rrY,         X86::VFNMADDPS4rmY,         TB_ALIGN_32 },
     { X86::VFNMADDPD4rrY,         X86::VFNMADDPD4rmY,         TB_ALIGN_32 },
-    { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           TB_ALIGN_16 },
-    { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           TB_ALIGN_16 },
+    { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           0           },
+    { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           0           },
     { X86::VFMSUBPS4rr,           X86::VFMSUBPS4rm,           TB_ALIGN_16 },
     { X86::VFMSUBPD4rr,           X86::VFMSUBPD4rm,           TB_ALIGN_16 },
     { X86::VFMSUBPS4rrY,          X86::VFMSUBPS4rmY,          TB_ALIGN_32 },
     { X86::VFMSUBPD4rrY,          X86::VFMSUBPD4rmY,          TB_ALIGN_32 },
+    { X86::VFNMSUBSS4rr,          X86::VFNMSUBSS4rm,          0           },
+    { X86::VFNMSUBSD4rr,          X86::VFNMSUBSD4rm,          0           },
     { X86::VFNMSUBPS4rr,          X86::VFNMSUBPS4rm,          TB_ALIGN_16 },
     { X86::VFNMSUBPD4rr,          X86::VFNMSUBPD4rm,          TB_ALIGN_16 },
     { X86::VFNMSUBPS4rrY,         X86::VFNMSUBPS4rmY,         TB_ALIGN_32 },

Modified: llvm/branches/R600/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrMMX.td?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrMMX.td Tue Nov 13 09:21:47 2012
@@ -207,8 +207,14 @@
 let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
-def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
-                        "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>;
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+                         "movd\t{$src, $dst|$dst, $src}",
+                         [(set GR32:$dst,
+                          (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),

Modified: llvm/branches/R600/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrSSE.td?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrSSE.td Tue Nov 13 09:21:47 2012
@@ -1302,7 +1302,7 @@
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
 
   // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
-  // is during lowering, where it's not possible to recognize the load fold 
+  // is during lowering, where it's not possible to recognize the load fold
   // cause it has two uses through a bitcast. One use disappears at isel time
   // and the fold opportunity reappears.
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
@@ -1322,7 +1322,7 @@
 
 let Predicates = [UseSSE2] in {
   // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
-  // is during lowering, where it's not possible to recognize the load fold 
+  // is during lowering, where it's not possible to recognize the load fold
   // cause it has two uses through a bitcast. One use disappears at isel time
   // and the fold opportunity reappears.
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
@@ -2159,7 +2159,7 @@
 
 // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
 multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
-                            Operand CC, SDNode OpNode, ValueType VT, 
+                            Operand CC, SDNode OpNode, ValueType VT,
                             PatFrag ld_frag, string asm, string asm_alt,
                             OpndItins itins> {
   def rr : SIi8<0xC2, MRMSrcReg,
@@ -2305,7 +2305,7 @@
 
 // sse12_cmp_packed - sse 1 & 2 compare packed instructions
 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
-                            Operand CC, Intrinsic Int, string asm, 
+                            Operand CC, Intrinsic Int, string asm,
                             string asm_alt, Domain d> {
   def rri : PIi8<0xC2, MRMSrcReg,
              (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
@@ -3293,17 +3293,52 @@
              sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
              sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
 
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                               Intrinsic F32Int, OpndItins itins> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode FR32:$src))]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+            Requires<[UseSSE1, OptForSize]>;
+  let Constraints = "$src1 = $dst" in {
+    def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, VR128:$src2),
+                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                      [], itins.rr>;
+    def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, ssmem:$src2),
+                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                      [], itins.rm>;
+  }
+}
+
 // Reciprocal approximations. Note that these typically require refinement
 // in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
-                            SSE_SQRTS>,
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
+                             SSE_SQRTS>,
              sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
              sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
                             SSE_SQRTS>;
-defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
-                            SSE_RCPS>,
+let Predicates = [UseSSE1] in {
+  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+            (RSQRTSSr_Int VR128:$src, VR128:$src)>;
+}
+
+defm RCP   : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
+                             SSE_RCPS>,
              sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
              sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
+let Predicates = [UseSSE1] in {
+  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+            (RCPSSr_Int VR128:$src, VR128:$src)>;
+}
 
 // There is no f64 version of the reciprocal approximation instructions.
 
@@ -5841,6 +5876,100 @@
             (PMOVZXBQrm addr:$src)>;
 }
 
+let Predicates = [HasAVX2] in {
+  def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;
+  def : Pat<(v8i32  (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;
+  def : Pat<(v4i64  (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;
+
+  def : Pat<(v8i32  (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;
+  def : Pat<(v4i64  (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;
+
+  def : Pat<(v4i64  (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;
+
+  def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))),
+            (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+  def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))),
+            (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+  def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))),
+            (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+  def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))),
+            (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+  def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))),
+            (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+  def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))),
+            (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+}
+
+let Predicates = [HasAVX] in {
+  def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;
+  def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;
+
+  def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;
+
+  def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;
+
+  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVZXBWrm addr:$src)>;
+  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVZXBWrm addr:$src)>;
+  def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+            (VPMOVZXBDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+            (VPMOVZXBQrm addr:$src)>;
+
+  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVZXWDrm addr:$src)>;
+  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVZXWDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+            (VPMOVZXWQrm addr:$src)>;
+
+  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVZXDQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVZXDQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+            (VPMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [UseSSE41] in {
+  def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;
+  def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;
+
+  def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;
+
+  def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;
+
+  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+            (PMOVZXBWrm addr:$src)>;
+  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+            (PMOVZXBWrm addr:$src)>;
+  def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+            (PMOVZXBDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),
+            (PMOVZXBQrm addr:$src)>;
+
+  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+            (PMOVZXWDrm addr:$src)>;
+  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+            (PMOVZXWDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+            (PMOVZXWQrm addr:$src)>;
+
+  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+            (PMOVZXDQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),
+            (PMOVZXDQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
+            (PMOVZXDQrm addr:$src)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Extract Instructions
 //===----------------------------------------------------------------------===//

Modified: llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp Tue Nov 13 09:21:47 2012
@@ -229,15 +229,26 @@
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   bool callsEHReturn = false;
   bool ghcCall = false;
+  bool oclBiCall = false;
+  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
 
   if (MF) {
     callsEHReturn = MF->getMMI().callsEHReturn();
     const Function *F = MF->getFunction();
     ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+    oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
   }
 
   if (ghcCall)
     return CSR_NoRegs_SaveList;
+  if (oclBiCall) {
+    if (HasAVX && IsWin64)
+        return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+    if (HasAVX && Is64Bit)
+        return CSR_64_Intel_OCL_BI_AVX_SaveList;
+    if (!HasAVX && !IsWin64 && Is64Bit)
+        return CSR_64_Intel_OCL_BI_SaveList;
+  }
   if (Is64Bit) {
     if (IsWin64)
       return CSR_Win64_SaveList;
@@ -252,6 +263,16 @@
 
 const uint32_t*
 X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+
+  if (CC == CallingConv::Intel_OCL_BI) {
+    if (IsWin64 && HasAVX)
+      return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
+    if (Is64Bit && HasAVX)
+      return CSR_64_Intel_OCL_BI_AVX_RegMask;
+    if (!HasAVX && !IsWin64 && Is64Bit)
+      return CSR_64_Intel_OCL_BI_RegMask;
+  }
   if (CC == CallingConv::GHC)
     return CSR_NoRegs_RegMask;
   if (!Is64Bit)

Modified: llvm/branches/R600/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86TargetMachine.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86TargetMachine.cpp Tue Nov 13 09:21:47 2012
@@ -49,7 +49,7 @@
     TSInfo(*this),
     TLInfo(*this),
     JITInfo(*this),
-    STTI(&TLInfo) {
+    STTI(&TLInfo), VTTI(&TLInfo) {
 }
 
 void X86_64TargetMachine::anchor() { }
@@ -66,7 +66,7 @@
     TSInfo(*this),
     TLInfo(*this),
     JITInfo(*this),
-    STTI(&TLInfo) {
+    STTI(&TLInfo), VTTI(&TLInfo){
 }
 
 /// X86TargetMachine ctor - Create an X86 target.
@@ -80,7 +80,6 @@
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
     FrameLowering(*this, Subtarget),
-    ELFWriterInfo(is64Bit, true),
     InstrItins(Subtarget.getInstrItineraryData()){
   // Determine the PICStyle based on the target selected.
   if (getRelocationModel() == Reloc::Static) {

Modified: llvm/branches/R600/lib/Target/X86/X86TargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86TargetMachine.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86TargetMachine.h (original)
+++ llvm/branches/R600/lib/Target/X86/X86TargetMachine.h Tue Nov 13 09:21:47 2012
@@ -15,7 +15,6 @@
 #define X86TARGETMACHINE_H
 
 #include "X86.h"
-#include "X86ELFWriterInfo.h"
 #include "X86InstrInfo.h"
 #include "X86ISelLowering.h"
 #include "X86FrameLowering.h"
@@ -34,7 +33,6 @@
 class X86TargetMachine : public LLVMTargetMachine {
   X86Subtarget       Subtarget;
   X86FrameLowering   FrameLowering;
-  X86ELFWriterInfo   ELFWriterInfo;
   InstrItineraryData InstrItins;
 
 public:
@@ -63,9 +61,6 @@
   virtual const X86RegisterInfo  *getRegisterInfo() const {
     return &getInstrInfo()->getRegisterInfo();
   }
-  virtual const X86ELFWriterInfo *getELFWriterInfo() const {
-    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
-  }
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
@@ -87,7 +82,7 @@
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
   ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
+  X86VectorTargetTransformInfo VTTI;
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -124,7 +119,7 @@
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
   ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
+  X86VectorTargetTransformInfo VTTI;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,

Modified: llvm/branches/R600/lib/Target/XCore/XCoreTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/XCore/XCoreTargetMachine.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/XCore/XCoreTargetMachine.cpp (original)
+++ llvm/branches/R600/lib/Target/XCore/XCoreTargetMachine.cpp Tue Nov 13 09:21:47 2012
@@ -32,7 +32,7 @@
     InstrInfo(),
     FrameLowering(Subtarget),
     TLInfo(*this),
-    TSInfo(*this), STTI(&TLInfo) {
+    TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) {
 }
 
 namespace {

Modified: llvm/branches/R600/lib/Transforms/IPO/ExtractGV.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/ExtractGV.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/ExtractGV.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/ExtractGV.cpp Tue Nov 13 09:21:47 2012
@@ -51,32 +51,75 @@
       // Visit the GlobalVariables.
       for (Module::global_iterator I = M.global_begin(), E = M.global_end();
            I != E; ++I) {
-        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
-          I->setInitializer(0);
-        } else {
+        bool Delete =
+          deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+        if (!Delete) {
           if (I->hasAvailableExternallyLinkage())
             continue;
           if (I->getName() == "llvm.global_ctors")
             continue;
         }
 
-        if (I->hasLocalLinkage())
+        bool Local = I->hasLocalLinkage();
+        if (Local)
           I->setVisibility(GlobalValue::HiddenVisibility);
-        I->setLinkage(GlobalValue::ExternalLinkage);
+
+        if (Local || Delete)
+          I->setLinkage(GlobalValue::ExternalLinkage);
+
+        if (Delete)
+          I->setInitializer(0);
       }
 
       // Visit the Functions.
       for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) {
-          I->deleteBody();
-        } else {
+        bool Delete =
+          deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+        if (!Delete) {
           if (I->hasAvailableExternallyLinkage())
             continue;
         }
 
-        if (I->hasLocalLinkage())
+        bool Local = I->hasLocalLinkage();
+        if (Local)
           I->setVisibility(GlobalValue::HiddenVisibility);
-        I->setLinkage(GlobalValue::ExternalLinkage);
+
+        if (Local || Delete)
+          I->setLinkage(GlobalValue::ExternalLinkage);
+
+        if (Delete)
+          I->deleteBody();
+      }
+
+      // Visit the Aliases.
+      for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+           I != E;) {
+        Module::alias_iterator CurI = I;
+        ++I;
+
+        if (CurI->hasLocalLinkage()) {
+          CurI->setVisibility(GlobalValue::HiddenVisibility);
+          CurI->setLinkage(GlobalValue::ExternalLinkage);
+        }
+
+        if (deleteStuff == (bool)Named.count(CurI)) {
+          Type *Ty =  CurI->getType()->getElementType();
+
+          CurI->removeFromParent();
+          llvm::Value *Declaration;
+          if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+            Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                           CurI->getName(), &M);
+
+          } else {
+            Declaration =
+              new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+                                 0, CurI->getName());
+
+          }
+          CurI->replaceAllUsesWith(Declaration);
+          delete CurI;
+        }
       }
 
       return true;

Modified: llvm/branches/R600/lib/Transforms/IPO/FunctionAttrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/FunctionAttrs.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/FunctionAttrs.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/FunctionAttrs.cpp Tue Nov 13 09:21:47 2012
@@ -28,9 +28,9 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
 #include "llvm/Support/InstIterator.h"
 using namespace llvm;
 
@@ -486,13 +486,13 @@
 /// or a pointer that doesn't alias any other pointer visible to the caller.
 bool FunctionAttrs::IsFunctionMallocLike(Function *F,
                               SmallPtrSet<Function*, 8> &SCCNodes) const {
-  UniqueVector<Value *> FlowsToReturn;
+  SmallSetVector<Value *, 8> FlowsToReturn;
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
       FlowsToReturn.insert(Ret->getReturnValue());
 
   for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
-    Value *RetVal = FlowsToReturn[i+1];   // UniqueVector[0] is reserved.
+    Value *RetVal = FlowsToReturn[i];
 
     if (Constant *C = dyn_cast<Constant>(RetVal)) {
       if (!C->isNullValue() && !isa<UndefValue>(C))

Modified: llvm/branches/R600/lib/Transforms/IPO/IPO.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/IPO.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/IPO.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/IPO.cpp Tue Nov 13 09:21:47 2012
@@ -95,7 +95,10 @@
 }
 
 void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
-  unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+  std::vector<const char *> Export;
+  if (AllButMain)
+    Export.push_back("main");
+  unwrap(PM)->add(createInternalizePass(Export));
 }
 
 void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {

Modified: llvm/branches/R600/lib/Transforms/IPO/Internalize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/Internalize.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/Internalize.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/Internalize.cpp Tue Nov 13 09:21:47 2012
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass loops over all of the functions in the input module, looking for a
-// main function.  If a main function is found, all other functions and all
-// global variables with initializers are marked as internal.
+// This pass loops over all of the functions and variables in the input module.
+// If the function or variable is not in the list of external names given to
+// the pass it is marked as internal.
 //
 //===----------------------------------------------------------------------===//
 
@@ -45,12 +45,9 @@
 namespace {
   class InternalizePass : public ModulePass {
     std::set<std::string> ExternalNames;
-    /// If no api symbols were specified and a main function is defined,
-    /// assume the main function is the only API
-    bool AllButMain;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit InternalizePass(bool AllButMain = true);
+    explicit InternalizePass();
     explicit InternalizePass(const std::vector <const char *>& exportList);
     void LoadFile(const char *Filename);
     virtual bool runOnModule(Module &M);
@@ -66,8 +63,8 @@
 INITIALIZE_PASS(InternalizePass, "internalize",
                 "Internalize Global Symbols", false, false)
 
-InternalizePass::InternalizePass(bool AllButMain)
-  : ModulePass(ID), AllButMain(AllButMain){
+InternalizePass::InternalizePass()
+  : ModulePass(ID) {
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   if (!APIFile.empty())           // If a filename is specified, use it.
     LoadFile(APIFile.c_str());
@@ -76,7 +73,7 @@
 }
 
 InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
-  : ModulePass(ID), AllButMain(false){
+  : ModulePass(ID){
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   for(std::vector<const char *>::const_iterator itr = exportList.begin();
         itr != exportList.end(); itr++) {
@@ -103,23 +100,6 @@
 bool InternalizePass::runOnModule(Module &M) {
   CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
-  
-  if (ExternalNames.empty()) {
-    // Return if we're not in 'all but main' mode and have no external api
-    if (!AllButMain)
-      return false;
-    // If no list or file of symbols was specified, check to see if there is a
-    // "main" symbol defined in the module.  If so, use it, otherwise do not
-    // internalize the module, it must be a library or something.
-    //
-    Function *MainFunc = M.getFunction("main");
-    if (MainFunc == 0 || MainFunc->isDeclaration())
-      return false;  // No main found, must be a library...
-
-    // Preserve main, internalize all else.
-    ExternalNames.insert(MainFunc->getName());
-  }
-
   bool Changed = false;
 
   // Never internalize functions which code-gen might insert.
@@ -189,8 +169,8 @@
   return Changed;
 }
 
-ModulePass *llvm::createInternalizePass(bool AllButMain) {
-  return new InternalizePass(AllButMain);
+ModulePass *llvm::createInternalizePass() {
+  return new InternalizePass();
 }
 
 ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {

Modified: llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp Tue Nov 13 09:21:47 2012
@@ -33,7 +33,11 @@
 using namespace llvm;
 
 static cl::opt<bool>
-RunVectorization("vectorize", cl::desc("Run vectorization passes"));
+RunLoopVectorization("vectorize-loops",
+                     cl::desc("Run the Loop vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
 
 static cl::opt<bool>
 UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +56,8 @@
     DisableSimplifyLibCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
-    Vectorize = RunVectorization;
+    Vectorize = RunBBVectorization;
+    LoopVectorize = RunLoopVectorization;
 }
 
 PassManagerBuilder::~PassManagerBuilder() {
@@ -185,7 +190,7 @@
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
-  if (Vectorize) {
+  if (LoopVectorize) {
     MPM.add(createLoopVectorizePass());
     MPM.add(createLICMPass());
   }
@@ -245,8 +250,11 @@
   // Now that composite has been compiled, scan through the module, looking
   // for a main function.  If main is defined, mark all other functions
   // internal.
-  if (Internalize)
-    PM.add(createInternalizePass(true));
+  if (Internalize) {
+    std::vector<const char*> E;
+    E.push_back("main");
+    PM.add(createInternalizePass(E));
+  }
 
   // Propagate constants at call sites into the functions they call.  This
   // opens opportunities for globalopt (and inlining) by substituting function

Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstCombine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstCombine.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstCombine.h (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstCombine.h Tue Nov 13 09:21:47 2012
@@ -367,6 +367,10 @@
 
 
   Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
+
+  /// Descale - Return a value X such that Val = X * Scale, or null if none.  If
+  /// the multiplication is known not to overflow then NoSignedWrap is set.
+  Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
 };
 
       

Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCasts.cpp Tue Nov 13 09:21:47 2012
@@ -238,17 +238,20 @@
   // Get the opcodes of the two Cast instructions
   Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
   Instruction::CastOps secondOp = Instruction::CastOps(opcode);
-
+  Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ?
+    TD->getIntPtrType(SrcTy) : 0;
+  Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ?
+    TD->getIntPtrType(MidTy) : 0;
+  Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ?
+    TD->getIntPtrType(DstTy) : 0;
   unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
-                                                DstTy,
-                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
-  
+                                                DstTy, SrcIntPtrTy, MidIntPtrTy,
+                                                DstIntPtrTy);
+
   // We don't want to form an inttoptr or ptrtoint that converts to an integer
   // type that differs from the pointer size.
-  if ((Res == Instruction::IntToPtr &&
-          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
-      (Res == Instruction::PtrToInt &&
-          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+  if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
+      (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
     Res = 0;
   
   return Instruction::CastOps(Res);
@@ -1293,16 +1296,15 @@
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
-  unsigned AS = CI.getAddressSpace();
   if (TD) {
     if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
-        TD->getPointerSizeInBits(AS)) {
+        TD->getPointerSizeInBits()) {
       Value *P = Builder->CreateTrunc(CI.getOperand(0),
                                       TD->getIntPtrType(CI.getContext()));
       return new IntToPtrInst(P, CI.getType());
     }
     if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
-        TD->getPointerSizeInBits(AS)) {
+        TD->getPointerSizeInBits()) {
       Value *P = Builder->CreateZExt(CI.getOperand(0),
                                      TD->getIntPtrType(CI.getContext()));
       return new IntToPtrInst(P, CI.getType());
@@ -1369,14 +1371,13 @@
   // If the destination integer type is not the intptr_t type for this target,
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
-  unsigned AS = CI.getPointerAddressSpace();
   if (TD) {
-    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
       Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
                                          TD->getIntPtrType(CI.getContext()));
       return new TruncInst(P, CI.getType());
     }
-    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
       Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
                                          TD->getIntPtrType(CI.getContext()));
       return new ZExtInst(P, CI.getType());

Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCompares.cpp Tue Nov 13 09:21:47 2012
@@ -365,12 +365,11 @@
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
-  unsigned AS = GEP->getPointerAddressSpace();
   // If the index is larger than the pointer size of the target, truncate the
   // index down like the GEP would do implicitly.  We don't have to do this for
   // an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds() &&
-      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS))
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
     Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
 
   // If the comparison is only true for one or two elements, emit direct
@@ -529,11 +528,10 @@
     }
   }
 
-  unsigned AS = cast<GetElementPtrInst>(GEP)->getPointerAddressSpace();
   // Okay, we know we have a single variable index, which must be a
   // pointer/array/vector index.  If there is no offset, life is simple, return
   // the index.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
   if (Offset == 0) {
     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
     // we don't need to bother extending: the extension won't affect where the
@@ -1554,8 +1552,7 @@
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
   if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      TD->getPointerSizeInBits(
-        cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) ==
+      TD->getPointerSizeInBits() ==
          cast<IntegerType>(DestTy)->getBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
@@ -2898,10 +2895,6 @@
         if (!RHSF)
           break;
 
-        // We can't convert a PPC double double.
-        if (RHSF->getType()->isPPC_FP128Ty())
-          break;
-
         const fltSemantics *Sem;
         // FIXME: This shouldn't be here.
         if (LHSExt->getSrcTy()->isHalfTy())
@@ -2914,6 +2907,8 @@
           Sem = &APFloat::IEEEquad;
         else if (LHSExt->getSrcTy()->isX86_FP80Ty())
           Sem = &APFloat::x87DoubleExtended;
+        else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
+          Sem = &APFloat::PPCDoubleDouble;
         else
           break;
 

Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Nov 13 09:21:47 2012
@@ -805,6 +805,244 @@
   return true;
 }
 
+/// Descale - Return a value X such that Val = X * Scale, or null if none.  If
+/// the multiplication is known not to overflow then NoSignedWrap is set.
+Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
+  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
+  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
+         Scale.getBitWidth() && "Scale not compatible with value!");
+
+  // If Val is zero or Scale is one then Val = Val * Scale.
+  if (match(Val, m_Zero()) || Scale == 1) {
+    NoSignedWrap = true;
+    return Val;
+  }
+
+  // If Scale is zero then it does not divide Val.
+  if (Scale.isMinValue())
+    return 0;
+
+  // Look through chains of multiplications, searching for a constant that is
+  // divisible by Scale.  For example, descaling X*(Y*(Z*4)) by a factor of 4
+  // will find the constant factor 4 and produce X*(Y*Z).  Descaling X*(Y*8) by
+  // a factor of 4 will produce X*(Y*2).  The principle of operation is to bore
+  // down from Val:
+  //
+  //     Val = M1 * X          ||   Analysis starts here and works down
+  //      M1 = M2 * Y          ||   Doesn't descend into terms with more
+  //      M2 =  Z * 4          \/   than one use
+  //
+  // Then to modify a term at the bottom:
+  //
+  //     Val = M1 * X
+  //      M1 =  Z * Y          ||   Replaced M2 with Z
+  //
+  // Then to work back up correcting nsw flags.
+
+  // Op - the term we are currently analyzing.  Starts at Val then drills down.
+  // Replaced with its descaled value before exiting from the drill down loop.
+  Value *Op = Val;
+
+  // Parent - initially null, but after drilling down notes where Op came from.
+  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
+  // 0'th operand of Val.
+  std::pair<Instruction*, unsigned> Parent;
+
+  // RequireNoSignedWrap - Set if the transform requires a descaling at deeper
+  // levels that doesn't overflow.
+  bool RequireNoSignedWrap = false;
+
+  // logScale - log base 2 of the scale.  Negative if not a power of 2.
+  int32_t logScale = Scale.exactLogBase2();
+
+  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
+
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+      // If Op is a constant divisible by Scale then descale to the quotient.
+      APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
+      APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
+      if (!Remainder.isMinValue())
+        // Not divisible by Scale.
+        return 0;
+      // Replace with the quotient in the parent.
+      Op = ConstantInt::get(CI->getType(), Quotient);
+      NoSignedWrap = true;
+      break;
+    }
+
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
+
+      if (BO->getOpcode() == Instruction::Mul) {
+        // Multiplication.
+        NoSignedWrap = BO->hasNoSignedWrap();
+        if (RequireNoSignedWrap && !NoSignedWrap)
+          return 0;
+
+        // There are three cases for multiplication: multiplication by exactly
+        // the scale, multiplication by a constant different to the scale, and
+        // multiplication by something else.
+        Value *LHS = BO->getOperand(0);
+        Value *RHS = BO->getOperand(1);
+
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+          // Multiplication by a constant.
+          if (CI->getValue() == Scale) {
+            // Multiplication by exactly the scale, replace the multiplication
+            // by its left-hand side in the parent.
+            Op = LHS;
+            break;
+          }
+
+          // Otherwise drill down into the constant.
+          if (!Op->hasOneUse())
+            return 0;
+
+          Parent = std::make_pair(BO, 1);
+          continue;
+        }
+
+        // Multiplication by something else. Drill down into the left-hand side
+        // since that's where the reassociate pass puts the good stuff.
+        if (!Op->hasOneUse())
+          return 0;
+
+        Parent = std::make_pair(BO, 0);
+        continue;
+      }
+
+      if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
+          isa<ConstantInt>(BO->getOperand(1))) {
+        // Multiplication by a power of 2.
+        NoSignedWrap = BO->hasNoSignedWrap();
+        if (RequireNoSignedWrap && !NoSignedWrap)
+          return 0;
+
+        Value *LHS = BO->getOperand(0);
+        int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
+          getLimitedValue(Scale.getBitWidth());
+        // Op = LHS << Amt.
+
+        if (Amt == logScale) {
+          // Multiplication by exactly the scale, replace the multiplication
+          // by its left-hand side in the parent.
+          Op = LHS;
+          break;
+        }
+        if (Amt < logScale || !Op->hasOneUse())
+          return 0;
+
+        // Multiplication by more than the scale.  Reduce the multiplying amount
+        // by the scale in the parent.
+        Parent = std::make_pair(BO, 1);
+        Op = ConstantInt::get(BO->getType(), Amt - logScale);
+        break;
+      }
+    }
+
+    if (!Op->hasOneUse())
+      return 0;
+
+    if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
+      if (Cast->getOpcode() == Instruction::SExt) {
+        // Op is sign-extended from a smaller type, descale in the smaller type.
+        unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+        APInt SmallScale = Scale.trunc(SmallSize);
+        // Suppose Op = sext X, and we descale X as Y * SmallScale.  We want to
+        // descale Op as (sext Y) * Scale.  In order to have
+        //   sext (Y * SmallScale) = (sext Y) * Scale
+        // some conditions need to hold however: SmallScale must sign-extend to
+        // Scale and the multiplication Y * SmallScale should not overflow.
+        if (SmallScale.sext(Scale.getBitWidth()) != Scale)
+          // SmallScale does not sign-extend to Scale.
+          return 0;
+        assert(SmallScale.exactLogBase2() == logScale);
+        // Require that Y * SmallScale must not overflow.
+        RequireNoSignedWrap = true;
+
+        // Drill down through the cast.
+        Parent = std::make_pair(Cast, 0);
+        Scale = SmallScale;
+        continue;
+      }
+
+      if (Cast->getOpcode() == Instruction::Trunc) {
+        // Op is truncated from a larger type, descale in the larger type.
+        // Suppose Op = trunc X, and we descale X as Y * sext Scale.  Then
+        //   trunc (Y * sext Scale) = (trunc Y) * Scale
+        // always holds.  However (trunc Y) * Scale may overflow even if
+        // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
+        // from this point up in the expression (see later).
+        if (RequireNoSignedWrap)
+          return 0;
+
+        // Drill down through the cast.
+        unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+        Parent = std::make_pair(Cast, 0);
+        Scale = Scale.sext(LargeSize);
+        if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
+          logScale = -1;
+        assert(Scale.exactLogBase2() == logScale);
+        continue;
+      }
+    }
+
+    // Unsupported expression, bail out.
+    return 0;
+  }
+
+  // We know that we can successfully descale, so from here on we can safely
+  // modify the IR.  Op holds the descaled version of the deepest term in the
+  // expression.  NoSignedWrap is 'true' if multiplying Op by Scale is known
+  // not to overflow.
+
+  if (!Parent.first)
+    // The expression only had one term.
+    return Op;
+
+  // Rewrite the parent using the descaled version of its operand.
+  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
+  assert(Op != Parent.first->getOperand(Parent.second) &&
+         "Descaling was a no-op?");
+  Parent.first->setOperand(Parent.second, Op);
+  Worklist.Add(Parent.first);
+
+  // Now work back up the expression correcting nsw flags.  The logic is based
+  // on the following observation: if X * Y is known not to overflow as a signed
+  // multiplication, and Y is replaced by a value Z with smaller absolute value,
+  // then X * Z will not overflow as a signed multiplication either.  As we work
+  // our way up, having NoSignedWrap 'true' means that the descaled value at the
+  // current level has strictly smaller absolute value than the original.
+  Instruction *Ancestor = Parent.first;
+  do {
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
+      // If the multiplication wasn't nsw then we can't say anything about the
+      // value of the descaled multiplication, and we have to clear nsw flags
+      // from this point on up.
+      bool OpNoSignedWrap = BO->hasNoSignedWrap();
+      NoSignedWrap &= OpNoSignedWrap;
+      if (NoSignedWrap != OpNoSignedWrap) {
+        BO->setHasNoSignedWrap(NoSignedWrap);
+        Worklist.Add(Ancestor);
+      }
+    } else if (Ancestor->getOpcode() == Instruction::Trunc) {
+      // The fact that the descaled input to the trunc has smaller absolute
+      // value than the original input doesn't tell us anything useful about
+      // the absolute values of the truncations.
+      NoSignedWrap = false;
+    }
+    assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
+           "Failed to keep proper track of nsw flags while drilling down?");
+
+    if (Ancestor == Val)
+      // Got to the top, all done!
+      return Val;
+
+    // Move up one level in the expression.
+    assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
+    Ancestor = Ancestor->use_back();
+  } while (1);
+}
+
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
 
@@ -817,7 +1055,7 @@
   // by multiples of a zero size type with zero.
   if (TD) {
     bool MadeChange = false;
-    Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+    Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType());
 
     gep_type_iterator GTI = gep_type_begin(GEP);
     for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
@@ -836,7 +1074,7 @@
         }
 
       Type *IndexTy = (*I)->getType();
-      if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
+      if (IndexTy != IntPtrTy) {
         // If we are using a wider index than needed for this platform, shrink
         // it to what we need.  If narrower, sign-extend it to what we need.
         // This explicit cast can make subsequent optimizations more obvious.
@@ -855,7 +1093,7 @@
     if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
       return 0;
 
-    // Note that if our source is a gep chain itself that we wait for that
+    // Note that if our source is a gep chain itself then we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
     if (GEPOperator *SrcGEP =
@@ -987,63 +1225,74 @@
       }
 
       // Transform things like:
+      // %V = mul i64 %N, 4
+      // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
+      // into:  %t1 = getelementptr i32* %arr, i32 %N; bitcast
+      if (TD && ResElTy->isSized() && SrcElTy->isSized()) {
+        // Check that changing the type amounts to dividing the index by a scale
+        // factor.
+        uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
+        uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy);
+        if (ResSize && SrcSize % ResSize == 0) {
+          Value *Idx = GEP.getOperand(1);
+          unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+          uint64_t Scale = SrcSize / ResSize;
+
+          // Earlier transforms ensure that the index has type IntPtrType, which
+          // considerably simplifies the logic by eliminating implicit casts.
+          assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+                 "Index not cast to pointer width?");
+
+          bool NSW;
+          if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+            // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+            // If the multiplication NewIdx * Scale may overflow then the new
+            // GEP may not be "inbounds".
+            Value *NewGEP = GEP.isInBounds() && NSW ?
+              Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
+              Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+            // The NewGEP must be pointer typed, so must the old one -> BitCast
+            return new BitCastInst(NewGEP, GEP.getType());
+          }
+        }
+      }
+
+      // Similarly, transform things like:
       // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
-
-      if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
+      if (TD && ResElTy->isSized() && SrcElTy->isSized() &&
+          SrcElTy->isArrayTy()) {
+        // Check that changing to the array element type amounts to dividing the
+        // index by a scale factor.
+        uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
         uint64_t ArrayEltSize =
-            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
-
-        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
-        // allow either a mul, shift, or constant here.
-        Value *NewIdx = 0;
-        ConstantInt *Scale = 0;
-        if (ArrayEltSize == 1) {
-          NewIdx = GEP.getOperand(1);
-          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
-        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
-          NewIdx = ConstantInt::get(CI->getType(), 1);
-          Scale = CI;
-        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
-          if (Inst->getOpcode() == Instruction::Shl &&
-              isa<ConstantInt>(Inst->getOperand(1))) {
-            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
-            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
-                                     1ULL << ShAmtVal);
-            NewIdx = Inst->getOperand(0);
-          } else if (Inst->getOpcode() == Instruction::Mul &&
-                     isa<ConstantInt>(Inst->getOperand(1))) {
-            Scale = cast<ConstantInt>(Inst->getOperand(1));
-            NewIdx = Inst->getOperand(0);
+          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+        if (ResSize && ArrayEltSize % ResSize == 0) {
+          Value *Idx = GEP.getOperand(1);
+          unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
+          uint64_t Scale = ArrayEltSize / ResSize;
+
+          // Earlier transforms ensure that the index has type IntPtrType, which
+          // considerably simplifies the logic by eliminating implicit casts.
+          assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+                 "Index not cast to pointer width?");
+
+          bool NSW;
+          if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
+            // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
+            // If the multiplication NewIdx * Scale may overflow then the new
+            // GEP may not be "inbounds".
+            Value *Off[2];
+            Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+            Off[1] = NewIdx;
+            Value *NewGEP = GEP.isInBounds() && NSW ?
+              Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
+              Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+            // The NewGEP must be pointer typed, so must the old one -> BitCast
+            return new BitCastInst(NewGEP, GEP.getType());
           }
         }
-
-        // If the index will be to exactly the right offset with the scale taken
-        // out, perform the transformation. Note, we don't know whether Scale is
-        // signed or not. We'll use unsigned version of division/modulo
-        // operation after making sure Scale doesn't have the sign bit set.
-        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
-            Scale->getZExtValue() % ArrayEltSize == 0) {
-          Scale = ConstantInt::get(Scale->getType(),
-                                   Scale->getZExtValue() / ArrayEltSize);
-          if (Scale->getZExtValue() != 1) {
-            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
-                                                       false /*ZExt*/);
-            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
-          }
-
-          // Insert the new GEP instruction.
-          Value *Idx[2];
-          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
-          Idx[1] = NewIdx;
-          Value *NewGEP = GEP.isInBounds() ?
-            Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()):
-            Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
-          // The NewGEP must be pointer typed, so must the old one -> BitCast
-          return new BitCastInst(NewGEP, GEP.getType());
-        }
       }
     }
   }

Modified: llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp Tue Nov 13 09:21:47 2012
@@ -215,6 +215,7 @@
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
   InlineAsm *EmptyAsm;
   SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
+  SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan;
 };
 
 }  // namespace
@@ -508,6 +509,7 @@
   if (BL->isIn(*G)) return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
+  if (GlobalsCreatedByAsan.count(G)) return false;  // Our own global.
   // Touch only those globals that will not be defined in other modules.
   // Don't handle ODR type linkages since other modules may be built w/o asan.
   if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
@@ -704,7 +706,7 @@
   BL.reset(new BlackList(ClBlackListFile));
 
   C = &(M.getContext());
-  LongSize = TD->getPointerSizeInBits(0);
+  LongSize = TD->getPointerSizeInBits();
   IntptrTy = Type::getIntNTy(*C, LongSize);
   IntptrPtrTy = PointerType::get(IntptrTy, 0);
 
@@ -729,8 +731,9 @@
       std::string FunctionName = std::string(kAsanReportErrorTemplate) +
           (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
       // If we are merging crash callbacks, they have two parameters.
-      AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
-          M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
+      AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
+          checkInterfaceFunction(M.getOrInsertFunction(
+              FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
     }
   }
 
@@ -1090,9 +1093,10 @@
   Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
                                    ConstantInt::get(IntptrTy, LongSize/8));
   BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
-  Value *Description = IRB.CreatePointerCast(
-      createPrivateGlobalForString(*F.getParent(), StackDescription.str()),
-      IntptrTy);
+  GlobalVariable *StackDescriptionGlobal =
+      createPrivateGlobalForString(*F.getParent(), StackDescription.str());
+  GlobalsCreatedByAsan.insert(StackDescriptionGlobal);
+  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
 
   // Poison the stack redzones at the entry.

Modified: llvm/branches/R600/lib/Transforms/Instrumentation/BoundsChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Instrumentation/BoundsChecking.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Instrumentation/BoundsChecking.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Instrumentation/BoundsChecking.cpp Tue Nov 13 09:21:47 2012
@@ -143,7 +143,7 @@
   Value *Offset = SizeOffset.second;
   ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
 
-  IntegerType *IntTy = TD->getIntPtrType(Inst->getContext());
+  Type *IntTy = TD->getIntPtrType(Ptr->getType());
   Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
 
   // three checks are required to ensure safety:

Modified: llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp Tue Nov 13 09:21:47 2012
@@ -18,7 +18,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
@@ -28,6 +27,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Assembly/Writer.h"
@@ -127,7 +127,6 @@
     bool OptimizeSelectInst(SelectInst *SI);
     bool DupRetToEnableTailCallOpts(ReturnInst *RI);
     bool PlaceDbgValues(Function &F);
-    bool ConvertLoadToSwitch(LoadInst *LI);
   };
 }
 
@@ -1291,11 +1290,9 @@
     return OptimizeCmpExpression(CI);
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    bool Changed = false;
     if (TLI)
-      Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
-    Changed |= ConvertLoadToSwitch(LI);
-    return Changed;
+      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+    return false;
   }
 
   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
@@ -1375,109 +1372,3 @@
   }
   return MadeChange;
 }
-
-static bool TargetSupportsJumpTables(const TargetLowering &TLI) {
-  return TLI.supportJumpTables() &&
-          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
-}
-
-/// ConvertLoadToSwitch - Convert loads from constant lookup tables into
-/// switches. This undos the switch-to-lookup table transformation in
-/// SimplifyCFG for targets where that is inprofitable.
-bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) {
-  // This only applies to targets that don't support jump tables.
-  if (!TLI || TargetSupportsJumpTables(*TLI))
-    return false;
-
-  // FIXME: In the future, it would be desirable to have enough target
-  // information in SimplifyCFG, so we could decide at that stage whether to
-  // transform the switch to a lookup table or not, and this
-  // reverse-transformation could be removed.
-
-  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
-  if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace())
-    return false;
-  if (GEP->getNumIndices() != 2)
-    return false;
-  Value *FirstIndex = GEP->idx_begin()[0];
-  ConstantInt *FirstIndexInt = dyn_cast<ConstantInt>(FirstIndex);
-  if (!FirstIndexInt || !FirstIndexInt->isZero())
-    return false;
-
-  Value *TableIndex = GEP->idx_begin()[1];
-  IntegerType *TableIndexTy = cast<IntegerType>(TableIndex->getType());
-
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
-  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
-    return false;
-
-  Constant *Arr = GV->getInitializer();
-  uint64_t NumElements;
-  if (ConstantArray *CA = dyn_cast<ConstantArray>(Arr))
-    NumElements = CA->getType()->getNumElements();
-  else if (ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(Arr))
-    NumElements = CDA->getNumElements();
-  else
-    return false;
-  if (NumElements < 2)
-    return false;
-
-  // Split the block.
-  BasicBlock *OriginalBB = LI->getParent();
-  BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI);
-
-  // Replace OriginalBB's terminator with a switch.
-  IRBuilder<> Builder(OriginalBB->getTerminator());
-  SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB,
-                                            NumElements - 1);
-  OriginalBB->getTerminator()->eraseFromParent();
-
-  // Count the frequency of each value to decide which to use as default.
-  SmallDenseMap<Constant*, uint64_t> ValueFreq;
-  for (uint64_t I = 0; I < NumElements; ++I)
-    ++ValueFreq[Arr->getAggregateElement(I)];
-  uint64_t MaxCount = 0;
-  Constant *DefaultValue = NULL;
-  for (SmallDenseMap<Constant*, uint64_t>::iterator I = ValueFreq.begin(),
-       E = ValueFreq.end(); I != E; ++I) {
-    if (I->second > MaxCount) {
-      MaxCount = I->second;
-      DefaultValue = I->first;
-    }
-  }
-  assert(DefaultValue && "No values in the array?");
-
-  // Create the phi node in PostSwitchBB, which will replace the load.
-  Builder.SetInsertPoint(PostSwitchBB->begin());
-  PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements);
-  PHI->addIncoming(DefaultValue, OriginalBB);
-
-  // Build basic blocks to target with the switch.
-  for (uint64_t I = 0; I < NumElements; ++I) {
-    Constant *C = Arr->getAggregateElement(I);
-    if (C == DefaultValue) continue; // Already covered by the default case.
-
-    BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(),
-                                        "lookup.bb",
-                                        PostSwitchBB->getParent(),
-                                        PostSwitchBB);
-    Switch->addCase(ConstantInt::get(TableIndexTy, I), BB);
-    Builder.SetInsertPoint(BB);
-    Builder.CreateBr(PostSwitchBB);
-    PHI->addIncoming(C, BB);
-  }
-
-  // Remove the load.
-  LI->replaceAllUsesWith(PHI);
-  LI->eraseFromParent();
-
-  // Clean up.
-  if (GEP->use_empty())
-    GEP->eraseFromParent();
-  if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty())
-    GV->eraseFromParent();
-
-  CurInstIterator = Switch;
-  return true;
-}

Modified: llvm/branches/R600/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/GVN.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/GVN.cpp Tue Nov 13 09:21:47 2012
@@ -746,7 +746,6 @@
   return true;
 }
 
-
 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
 /// then a load from a must-aliased pointer of a different type, try to coerce
 /// the stored value.  LoadedTy is the type of the load we want to replace and
@@ -769,24 +768,25 @@
   // If the store and reload are the same size, we can always reuse it.
   if (StoreSize == LoadSize) {
     // Pointer to Pointer -> use bitcast.
-    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
+    if (StoredValTy->getScalarType()->isPointerTy() &&
+        LoadedTy->getScalarType()->isPointerTy())
       return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
 
     // Convert source pointers to integers, which can be bitcast.
-    if (StoredValTy->isPointerTy()) {
-      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+    if (StoredValTy->getScalarType()->isPointerTy()) {
+      StoredValTy = TD.getIntPtrType(StoredValTy);
       StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
     }
 
     Type *TypeToCastTo = LoadedTy;
-    if (TypeToCastTo->isPointerTy())
-      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+    if (TypeToCastTo->getScalarType()->isPointerTy())
+      TypeToCastTo = TD.getIntPtrType(TypeToCastTo);
 
     if (StoredValTy != TypeToCastTo)
       StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
 
     // Cast to pointer if the load needs a pointer type.
-    if (LoadedTy->isPointerTy())
+    if (LoadedTy->getScalarType()->isPointerTy())
       StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
 
     return StoredVal;
@@ -798,8 +798,8 @@
   assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
 
   // Convert source pointers to integers, which can be manipulated.
-  if (StoredValTy->isPointerTy()) {
-    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+  if (StoredValTy->getScalarType()->isPointerTy()) {
+    StoredValTy = TD.getIntPtrType(StoredValTy);
     StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
   }
 
@@ -824,7 +824,7 @@
     return StoredVal;
 
   // If the result is a pointer, inttoptr.
-  if (LoadedTy->isPointerTy())
+  if (LoadedTy->getScalarType()->isPointerTy())
     return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
 
   // Otherwise, bitcast.
@@ -1019,8 +1019,9 @@
 
   // Compute which bits of the stored value are being used by the load.  Convert
   // to an integer type to start with.
-  if (SrcVal->getType()->isPointerTy())
-    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
+  if (SrcVal->getType()->getScalarType()->isPointerTy())
+    SrcVal = Builder.CreatePtrToInt(SrcVal,
+        TD.getIntPtrType(SrcVal->getType()));
   if (!SrcVal->getType()->isIntegerTy())
     SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
 
@@ -1301,7 +1302,7 @@
   Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
 
   // If new PHI nodes were created, notify alias analysis.
-  if (V->getType()->isPointerTy()) {
+  if (V->getType()->getScalarType()->isPointerTy()) {
     AliasAnalysis *AA = gvn.getAliasAnalysis();
 
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
@@ -1498,7 +1499,7 @@
 
     if (isa<PHINode>(V))
       V->takeName(LI);
-    if (V->getType()->isPointerTy())
+    if (V->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(LI);
     ++NumGVNLoad;
@@ -1730,7 +1731,7 @@
   LI->replaceAllUsesWith(V);
   if (isa<PHINode>(V))
     V->takeName(LI);
-  if (V->getType()->isPointerTy())
+  if (V->getType()->getScalarType()->isPointerTy())
     MD->invalidateCachedPointerInfo(V);
   markInstructionForDeletion(LI);
   ++NumPRELoad;
@@ -1857,7 +1858,7 @@
 
       // Replace the load!
       L->replaceAllUsesWith(AvailVal);
-      if (AvailVal->getType()->isPointerTy())
+      if (AvailVal->getType()->getScalarType()->isPointerTy())
         MD->invalidateCachedPointerInfo(AvailVal);
       markInstructionForDeletion(L);
       ++NumGVNLoad;
@@ -1914,7 +1915,7 @@
 
     // Remove it!
     L->replaceAllUsesWith(StoredVal);
-    if (StoredVal->getType()->isPointerTy())
+    if (StoredVal->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(StoredVal);
     markInstructionForDeletion(L);
     ++NumGVNLoad;
@@ -1943,7 +1944,7 @@
 
     // Remove it!
     patchAndReplaceAllUsesWith(AvailableVal, L);
-    if (DepLI->getType()->isPointerTy())
+    if (DepLI->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(DepLI);
     markInstructionForDeletion(L);
     ++NumGVNLoad;
@@ -2184,7 +2185,7 @@
   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
   if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
     I->replaceAllUsesWith(V);
-    if (MD && V->getType()->isPointerTy())
+    if (MD && V->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(I);
     ++NumGVNSimpl;
@@ -2284,7 +2285,7 @@
 
   // Remove it!
   patchAndReplaceAllUsesWith(repl, I);
-  if (MD && repl->getType()->isPointerTy())
+  if (MD && repl->getType()->getScalarType()->isPointerTy())
     MD->invalidateCachedPointerInfo(repl);
   markInstructionForDeletion(I);
   return true;
@@ -2532,7 +2533,7 @@
       addToLeaderTable(ValNo, Phi, CurrentBlock);
       Phi->setDebugLoc(CurInst->getDebugLoc());
       CurInst->replaceAllUsesWith(Phi);
-      if (Phi->getType()->isPointerTy()) {
+      if (Phi->getType()->getScalarType()->isPointerTy()) {
         // Because we have added a PHI-use of the pointer value, it has now
         // "escaped" from alias analysis' perspective.  We need to inform
         // AA of this.

Modified: llvm/branches/R600/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/IndVarSimplify.cpp Tue Nov 13 09:21:47 2012
@@ -220,8 +220,6 @@
 /// ConvertToSInt - Convert APF to an integer, if possible.
 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
   bool isExact = false;
-  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
-    return false;
   // See if we can convert this to an int64_t
   uint64_t UIntVal;
   if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,

Modified: llvm/branches/R600/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/MemCpyOptimizer.cpp Tue Nov 13 09:21:47 2012
@@ -174,11 +174,10 @@
   // this width can be stored.  If so, check to see whether we will end up
   // actually reducing the number of stores used.
   unsigned Bytes = unsigned(End-Start);
-  unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace();
-  unsigned NumPointerStores = Bytes/TD.getPointerSize(AS);
+  unsigned NumPointerStores = Bytes/TD.getPointerSize();
 
   // Assume the remaining bytes if any are done a byte at a time.
-  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS);
+  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
 
   // If we will reduce the # stores (according to this heuristic), do the
   // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32

Modified: llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp Tue Nov 13 09:21:47 2012
@@ -444,7 +444,6 @@
 
   bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
     GEPOffset = Offset;
-    unsigned int AS = GEPI.getPointerAddressSpace();
     for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
          GTI != GTE; ++GTI) {
       ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
@@ -474,7 +473,7 @@
         continue;
       }
 
-      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS));
+      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
       Index *= APInt(Index.getBitWidth(),
                      TD.getTypeAllocSize(GTI.getIndexedType()));
       Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
@@ -582,7 +581,8 @@
     P.Partitions.push_back(New);
   }
 
-  bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
+  bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
+                         bool IsVolatile) {
     uint64_t Size = TD.getTypeStoreSize(Ty);
 
     // If this memory access can be shown to *statically* extend outside the
@@ -603,7 +603,14 @@
       return true;
     }
 
-    insertUse(I, Offset, Size);
+    // We allow splitting of loads and stores where the type is an integer type
+    // and which cover the entire alloca. Such integer loads and stores
+    // often require decomposition into fine grained loads and stores.
+    bool IsSplittable = false;
+    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+      IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+
+    insertUse(I, Offset, Size, IsSplittable);
     return true;
   }
 
@@ -624,7 +631,7 @@
   bool visitLoadInst(LoadInst &LI) {
     assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
            "All simple FCA loads should have been pre-split");
-    return handleLoadOrStore(LI.getType(), LI, Offset);
+    return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
   }
 
   bool visitStoreInst(StoreInst &SI) {
@@ -634,7 +641,7 @@
 
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
-    return handleLoadOrStore(ValOp->getType(), SI, Offset);
+    return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
   }
 
 
@@ -1173,6 +1180,21 @@
       UserTy = LI->getType();
     } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
       UserTy = SI->getValueOperand()->getType();
+    } else {
+      return 0; // Bail if we have weird uses.
+    }
+
+    if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+      // If the type is larger than the partition, skip it. We only encounter
+      // this for split integer operations where we want to use the type of the
+      // entity causing the split.
+      if (ITy->getBitWidth() > (I->EndOffset - I->BeginOffset)*8)
+        continue;
+
+      // If we have found an integer type use covering the alloca, use that
+      // regardless of the other types, as integers are often used for a "bucket
+      // of bits" type.
+      return ITy;
     }
 
     if (Ty && Ty != UserTy)
@@ -2138,6 +2160,14 @@
   if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
     return false;
 
+  // We need to ensure that an integer type with the appropriate bitwidth can
+  // be converted to the alloca type, whatever that is. We don't want to force
+  // the alloca itself to have an integer type if there is a more suitable one.
+  Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
+  if (!canConvertValue(TD, AllocaTy, IntTy) ||
+      !canConvertValue(TD, IntTy, AllocaTy))
+    return false;
+
   uint64_t Size = TD.getTypeStoreSize(AllocaTy);
 
   // Check the uses to ensure the uses are (likely) promoteable integer uses.
@@ -2364,8 +2394,7 @@
 
   Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
     assert(BeginOffset >= NewAllocaBeginOffset);
-    unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace();
-    APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset);
+    APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
     return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
   }
 
@@ -2458,6 +2487,49 @@
     assert(OldOp == OldPtr);
     IRBuilder<> IRB(&LI);
 
+    uint64_t Size = EndOffset - BeginOffset;
+    if (Size < TD.getTypeStoreSize(LI.getType())) {
+      assert(!LI.isVolatile());
+      assert(LI.getType()->isIntegerTy() &&
+             "Only integer type loads and stores are split");
+      assert(LI.getType()->getIntegerBitWidth() ==
+             TD.getTypeStoreSizeInBits(LI.getType()) &&
+             "Non-byte-multiple bit width");
+      assert(LI.getType()->getIntegerBitWidth() ==
+             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
+             "Only alloca-wide loads can be split and recomposed");
+      IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
+      bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+                           canConvertValue(TD, NewAllocaTy, NarrowTy);
+      Value *V;
+      // Move the insertion point just past the load so that we can refer to it.
+      IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+      if (IsConvertable)
+        V = convertValue(TD, IRB,
+                         IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                               getName(".load")),
+                         NarrowTy);
+      else
+        V = IRB.CreateAlignedLoad(
+          getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+          getPartitionTypeAlign(NarrowTy), getName(".load"));
+      // Create a placeholder value with the same type as LI to use as the
+      // basis for the new value. This allows us to replace the uses of LI with
+      // the computed value, and then replace the placeholder with LI, leaving
+      // LI only used for this computation.
+      Value *Placeholder
+        = new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
+      V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
+                        getName(".insert"));
+      LI.replaceAllUsesWith(V);
+      Placeholder->replaceAllUsesWith(&LI);
+      delete Placeholder;
+      if (Pass.DeadSplitInsts.insert(&LI))
+        Pass.DeadInsts.push_back(&LI);
+      DEBUG(dbgs() << "          to: " << *V << "\n");
+      return IsConvertable;
+    }
+
     if (VecTy)
       return rewriteVectorizedLoadInst(IRB, LI, OldOp);
     if (IntTy && LI.getType()->isIntegerTy())
@@ -2539,6 +2611,39 @@
     if (VecTy)
       return rewriteVectorizedStoreInst(IRB, SI, OldOp);
     Type *ValueTy = SI.getValueOperand()->getType();
+
+    uint64_t Size = EndOffset - BeginOffset;
+    if (Size < TD.getTypeStoreSize(ValueTy)) {
+      assert(!SI.isVolatile());
+      assert(ValueTy->isIntegerTy() &&
+             "Only integer type loads and stores are split");
+      assert(ValueTy->getIntegerBitWidth() ==
+             TD.getTypeStoreSizeInBits(ValueTy) &&
+             "Non-byte-multiple bit width");
+      assert(ValueTy->getIntegerBitWidth() ==
+             TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+             "Only alloca-wide stores can be split and recomposed");
+      IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+      Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
+                                BeginOffset, getName(".extract"));
+      StoreInst *NewSI;
+      bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
+                           canConvertValue(TD, NarrowTy, NewAllocaTy);
+      if (IsConvertable)
+        NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
+                                       &NewAI, NewAI.getAlignment());
+      else
+        NewSI = IRB.CreateAlignedStore(
+          V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
+          getPartitionTypeAlign(NarrowTy));
+      (void)NewSI;
+      if (Pass.DeadSplitInsts.insert(&SI))
+        Pass.DeadInsts.push_back(&SI);
+
+      DEBUG(dbgs() << "          to: " << *NewSI << "\n");
+      return IsConvertable;
+    }
+
     if (IntTy && ValueTy->isIntegerTy())
       return rewriteIntegerStore(IRB, SI);
 
@@ -2686,10 +2791,8 @@
     const AllocaPartitioning::MemTransferOffsets &MTO
       = P.getMemTransferOffsets(II);
 
-    assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!");
-    unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace();
     // Compute the relative offset within the transfer.
-    unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
+    unsigned IntPtrWidth = TD.getPointerSizeInBits();
     APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
                                                        : MTO.SourceBegin));
 
@@ -3173,6 +3276,9 @@
                               uint64_t Offset, uint64_t Size) {
   if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
     return stripAggregateTypeWrapping(TD, Ty);
+  if (Offset > TD.getTypeAllocSize(Ty) ||
+      (TD.getTypeAllocSize(Ty) - Offset) < Size)
+    return 0;
 
   if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
     // We can't partition pointers...
@@ -3464,6 +3570,8 @@
     Instruction *I = DeadInsts.pop_back_val();
     DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
 
+    I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
     for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
       if (Instruction *U = dyn_cast<Instruction>(*OI)) {
         // Zero out the operand and see if it becomes trivially dead.

Modified: llvm/branches/R600/lib/Transforms/Scalar/SimplifyCFGPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/SimplifyCFGPass.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/SimplifyCFGPass.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/SimplifyCFGPass.cpp Tue Nov 13 09:21:47 2012
@@ -35,6 +35,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/TargetTransformInfo.h"
 using namespace llvm;
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
@@ -293,7 +294,8 @@
 
 /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) {
+static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
+                                   const TargetTransformInfo *TTI) {
   bool Changed = false;
   bool LocalChange = true;
   while (LocalChange) {
@@ -302,7 +304,7 @@
     // Loop over all of the basic blocks and remove them if they are unneeded...
     //
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
-      if (SimplifyCFG(BBIt++, TD)) {
+      if (SimplifyCFG(BBIt++, TD, TTI)) {
         LocalChange = true;
         ++NumSimpl;
       }
@@ -317,9 +319,11 @@
 //
 bool CFGSimplifyPass::runOnFunction(Function &F) {
   const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+  const TargetTransformInfo *TTI =
+      getAnalysisIfAvailable<TargetTransformInfo>();
   bool EverChanged = removeUnreachableBlocksFromFn(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
-  EverChanged |= iterativelySimplifyCFG(F, TD);
+  EverChanged |= iterativelySimplifyCFG(F, TD, TTI);
 
   // If neither pass changed anything, we're done.
   if (!EverChanged) return false;
@@ -333,7 +337,7 @@
     return true;
 
   do {
-    EverChanged = iterativelySimplifyCFG(F, TD);
+    EverChanged = iterativelySimplifyCFG(F, TD, TTI);
     EverChanged |= removeUnreachableBlocksFromFn(F);
   } while (EverChanged);
 

Modified: llvm/branches/R600/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Nov 13 09:21:47 2012
@@ -90,22 +90,6 @@
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
-/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
-      if (IC->isEquality())
-        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
-          if (C->isNullValue())
-            continue;
-    // Unknown instruction.
-    return false;
-  }
-  return true;
-}
-
 static bool CallHasFloatingPointArgument(const CallInst *CI) {
   for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
        it != e; ++it) {
@@ -135,187 +119,6 @@
 
 namespace {
 //===---------------------------------------===//
-// 'stpcpy' Optimizations
-
-struct StpCpyOpt: public LibCallOptimization {
-  bool OptChkCall;  // True if it's optimizing a __stpcpy_chk libcall.
-
-  StpCpyOpt(bool c) : OptChkCall(c) {}
-
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "stpcpy" function prototype.
-    unsigned NumParams = OptChkCall ? 3 : 2;
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != NumParams ||
-        FT->getReturnType() != FT->getParamType(0) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != B.getInt8PtrTy())
-      return 0;
-
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
-    if (Dst == Src) {  // stpcpy(x,x)  -> x+strlen(x)
-      Value *StrLen = EmitStrLen(Src, B, TD, TLI);
-      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
-    }
-
-    // See if we can get the length of the input string.
-    uint64_t Len = GetStringLength(Src);
-    if (Len == 0) return 0;
-
-    Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len);
-    Value *DstEnd = B.CreateGEP(Dst,
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 Len - 1));
-
-    // We have enough information to now generate the memcpy call to do the
-    // copy for us.  Make a memcpy to copy the nul byte with align = 1.
-    if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
-                                      TD, TLI))
-      B.CreateMemCpy(Dst, Src, LenV, 1);
-    return DstEnd;
-  }
-};
-
-//===---------------------------------------===//
-// 'strncpy' Optimizations
-
-struct StrNCpyOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != B.getInt8PtrTy() ||
-        !FT->getParamType(2)->isIntegerTy())
-      return 0;
-
-    Value *Dst = CI->getArgOperand(0);
-    Value *Src = CI->getArgOperand(1);
-    Value *LenOp = CI->getArgOperand(2);
-
-    // See if we can get the length of the input string.
-    uint64_t SrcLen = GetStringLength(Src);
-    if (SrcLen == 0) return 0;
-    --SrcLen;
-
-    if (SrcLen == 0) {
-      // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
-      return Dst;
-    }
-
-    uint64_t Len;
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
-      Len = LengthArg->getZExtValue();
-    else
-      return 0;
-
-    if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
-
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    // Let strncpy handle the zero padding
-    if (Len > SrcLen+1) return 0;
-
-    // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
-    B.CreateMemCpy(Dst, Src,
-                   ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
-
-    return Dst;
-  }
-};
-
-//===---------------------------------------===//
-// 'strlen' Optimizations
-
-struct StrLenOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 1 ||
-        FT->getParamType(0) != B.getInt8PtrTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    Value *Src = CI->getArgOperand(0);
-
-    // Constant folding: strlen("xyz") -> 3
-    if (uint64_t Len = GetStringLength(Src))
-      return ConstantInt::get(CI->getType(), Len-1);
-
-    // strlen(x) != 0 --> *x != 0
-    // strlen(x) == 0 --> *x == 0
-    if (IsOnlyUsedInZeroEqualityComparison(CI))
-      return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
-    return 0;
-  }
-};
-
-
-//===---------------------------------------===//
-// 'strpbrk' Optimizations
-
-struct StrPBrkOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getParamType(0) != B.getInt8PtrTy() ||
-        FT->getParamType(1) != FT->getParamType(0) ||
-        FT->getReturnType() != FT->getParamType(0))
-      return 0;
-
-    StringRef S1, S2;
-    bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
-    bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
-
-    // strpbrk(s, "") -> NULL
-    // strpbrk("", s) -> NULL
-    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
-      return Constant::getNullValue(CI->getType());
-
-    // Constant folding.
-    if (HasS1 && HasS2) {
-      size_t I = S1.find_first_of(S2);
-      if (I == std::string::npos) // No match.
-        return Constant::getNullValue(CI->getType());
-
-      return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
-    }
-
-    // strpbrk(s, "a") -> strchr(s, 'a')
-    if (TD && HasS2 && S2.size() == 1)
-      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'strto*' Optimizations.  This handles strtol, strtod, strtof, strtoul, etc.
-
-struct StrToOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
-        !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy())
-      return 0;
-
-    Value *EndPtr = CI->getArgOperand(1);
-    if (isa<ConstantPointerNull>(EndPtr)) {
-      // With a null EndPtr, this function won't capture the main argument.
-      // It would be readonly too, except that it still may write to errno.
-      CI->addAttribute(1, Attributes::get(Callee->getContext(),
-                                          Attributes::NoCapture));
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
 // 'strspn' Optimizations
 
 struct StrSpnOpt : public LibCallOptimization {
@@ -1234,10 +1037,7 @@
 
     StringMap<LibCallOptimization*> Optimizations;
     // String and Memory LibCall Optimizations
-    StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
-    StrNCpyOpt StrNCpy;
-    StrLenOpt StrLen; StrPBrkOpt StrPBrk;
-    StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
+    StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
     MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
     // Math Library Optimizations
     CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
@@ -1253,8 +1053,8 @@
     bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
-    SimplifyLibCalls() : FunctionPass(ID), StpCpy(false), StpCpyChk(true),
-                         UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) {
+    SimplifyLibCalls() : FunctionPass(ID), UnaryDoubleFP(false),
+                         UnsafeUnaryDoubleFP(true) {
       initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
     void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
@@ -1305,17 +1105,6 @@
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
   // String and Memory LibCall Optimizations
-  Optimizations["strncpy"] = &StrNCpy;
-  Optimizations["stpcpy"] = &StpCpy;
-  Optimizations["strlen"] = &StrLen;
-  Optimizations["strpbrk"] = &StrPBrk;
-  Optimizations["strtol"] = &StrTo;
-  Optimizations["strtod"] = &StrTo;
-  Optimizations["strtof"] = &StrTo;
-  Optimizations["strtoul"] = &StrTo;
-  Optimizations["strtoll"] = &StrTo;
-  Optimizations["strtold"] = &StrTo;
-  Optimizations["strtoull"] = &StrTo;
   Optimizations["strspn"] = &StrSpn;
   Optimizations["strcspn"] = &StrCSpn;
   Optimizations["strstr"] = &StrStr;
@@ -1324,9 +1113,6 @@
   Optimizations["memmove"] = &MemMove;
   AddOpt(LibFunc::memset, &MemSet);
 
-  // _chk variants of String and Memory LibCall Optimizations.
-  Optimizations["__stpcpy_chk"] = &StpCpyChk;
-
   // Math Library Optimizations
   Optimizations["cosf"] = &Cos;
   Optimizations["cos"] = &Cos;

Modified: llvm/branches/R600/lib/Transforms/Scalar/TailRecursionElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/TailRecursionElimination.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/TailRecursionElimination.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/TailRecursionElimination.cpp Tue Nov 13 09:21:47 2012
@@ -69,7 +69,6 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
@@ -118,45 +117,34 @@
   return new TailCallElim();
 }
 
-/// CanTRE - Scan the specified basic block for alloca instructions.
-/// If it contains any that are variable-sized or not in the entry block,
-/// returns false.
-static bool CanTRE(AllocaInst *AI) {
-  // Because of PR962, we don't TRE allocas outside the entry block.
-
-  // If this alloca is in the body of the function, or if it is a variable
-  // sized allocation, we cannot tail call eliminate calls marked 'tail'
-  // with this mechanism.
-  BasicBlock *BB = AI->getParent();
-  return BB == &BB->getParent()->getEntryBlock() &&
-         isa<ConstantInt>(AI->getArraySize());
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function.  We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+  // FIXME: do simple 'address taken' analysis.
+  return true;
 }
 
-struct AllocaCaptureTracker : public CaptureTracker {
-  AllocaCaptureTracker() : Captured(false) {}
-
-  void tooManyUses() { Captured = true; }
-
-  bool shouldExplore(Use *U) {
-    Value *V = U->getUser();
-    if (isa<CallInst>(V) || isa<InvokeInst>(V))
-      UsesAlloca.push_back(V);
-
-    return true;
-  }
-
-  bool captured(Use *U) {
-    if (isa<ReturnInst>(U->getUser()))
-      return false;
-
-    Captured = true;
-    return true;
-  }
-
-  SmallVector<WeakVH, 64> UsesAlloca;
-
-  bool Captured;
-};
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
+/// instructions.  If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+                                    bool &CannotTCETailMarkedCall) {
+  bool RetVal = false;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+      RetVal |= AllocaMightEscapeToCalls(AI);
+
+      // If this alloca is in the body of the function, or if it is a variable
+      // sized allocation, we cannot tail call eliminate calls marked 'tail'
+      // with this mechanism.
+      if (BB != &BB->getParent()->getEntryBlock() ||
+          !isa<ConstantInt>(AI->getArraySize()))
+        CannotTCETailMarkedCall = true;
+    }
+  return RetVal;
+}
 
 bool TailCallElim::runOnFunction(Function &F) {
   // If this function is a varargs function, we won't be able to PHI the args
@@ -169,34 +157,38 @@
   bool MadeChange = false;
   bool FunctionContainsEscapingAllocas = false;
 
-  // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
+  // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
   // marked with the 'tail' attribute, because doing so would cause the stack
-  // size to increase (real TRE would deallocate variable sized allocas, TRE
+  // size to increase (real TCE would deallocate variable sized allocas, TCE
   // doesn't).
-  bool CanTRETailMarkedCall = true;
+  bool CannotTCETailMarkedCall = false;
 
-  // Find calls that can be marked tail.
-  AllocaCaptureTracker ACT;
-  for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
-        CanTRETailMarkedCall &= CanTRE(AI);
-        PointerMayBeCaptured(AI, &ACT);
-        if (ACT.Captured)
-          return false;
-      }
-    }
+  // Loop over the function, looking for any returning blocks, and keeping track
+  // of whether this function has any non-trivially used allocas.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+      break;
+
+    FunctionContainsEscapingAllocas |=
+      CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
   }
 
-  // Second pass, change any tail recursive calls to loops.
+  /// FIXME: The code generator produces really bad code when an 'escaping
+  /// alloca' is changed from being a static alloca to being a dynamic alloca.
+  /// Until this is resolved, disable this transformation if that would ever
+  /// happen.  This bug is PR962.
+  if (FunctionContainsEscapingAllocas)
+    return false;
+
+  // Second pass, change any tail calls to loops.
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
       bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
-                                          ArgumentPHIs, !CanTRETailMarkedCall);
+                                          ArgumentPHIs,CannotTCETailMarkedCall);
       if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
         Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
                                           TailCallsAreMarkedTail, ArgumentPHIs,
-                                          !CanTRETailMarkedCall);
+                                          CannotTCETailMarkedCall);
       MadeChange |= Change;
     }
   }
@@ -218,24 +210,21 @@
     }
   }
 
-  // Finally, if this function contains no non-escaping allocas and doesn't
-  // call setjmp, mark all calls in the function as eligible for tail calls
-  // (there is no stack memory for them to access).
-  std::sort(ACT.UsesAlloca.begin(), ACT.UsesAlloca.end());
-
+  // Finally, if this function contains no non-escaping allocas, or calls
+  // setjmp, mark all calls in the function as eligible for tail calls
+  //(there is no stack memory for them to access).
   if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-        if (CallInst *CI = dyn_cast<CallInst>(I))
-          if (!std::binary_search(ACT.UsesAlloca.begin(), ACT.UsesAlloca.end(),
-                                  CI)) {
-            CI->setTailCall();
-            MadeChange = true;
-          }
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          CI->setTailCall();
+          MadeChange = true;
+        }
 
   return MadeChange;
 }
 
+
 /// CanMoveAboveCall - Return true if it is safe to move the specified
 /// instruction from after the call to before the call, assuming that all
 /// instructions between the call and this instruction are movable.

Modified: llvm/branches/R600/lib/Transforms/Utils/LCSSA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/LCSSA.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/LCSSA.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/LCSSA.cpp Tue Nov 13 09:21:47 2012
@@ -53,6 +53,8 @@
 
     // Cached analysis information for the current function.
     DominatorTree *DT;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
     std::vector<BasicBlock*> LoopBlocks;
     PredIteratorCache PredCache;
     Loop *L;
@@ -117,6 +119,8 @@
   L = TheLoop;
   
   DT = &getAnalysis<DominatorTree>();
+  LI = &getAnalysis<LoopInfo>();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
 
   // Get the set of exiting blocks.
   SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -156,6 +160,12 @@
       MadeChange |= ProcessInstruction(I, ExitBlocks);
     }
   }
+
+  // If we modified the code, remove any caches about the loop from SCEV to
+  // avoid dangling entries.
+  // FIXME: This is a big hammer, can we clear the cache more selectively?
+  if (SE && MadeChange)
+    SE->forgetLoop(L);
   
   assert(L->isLCSSAForm(*DT));
   PredCache.clear();
@@ -245,7 +255,7 @@
     // Remember that this phi makes the value alive in this block.
     SSAUpdate.AddAvailableValue(ExitBB, PN);
   }
-  
+
   // Rewrite all uses outside the loop in terms of the new PHIs we just
   // inserted.
   for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
@@ -260,6 +270,9 @@
 
     if (isa<PHINode>(UserBB->begin()) &&
         isExitBlock(UserBB, ExitBlocks)) {
+      // Tell the VHs that the uses changed. This updates SCEV's caches.
+      if (UsesToRewrite[i]->get()->hasValueHandle())
+        ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
       UsesToRewrite[i]->set(UserBB->begin());
       continue;
     }

Modified: llvm/branches/R600/lib/Transforms/Utils/Local.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/Local.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/Local.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/Local.cpp Tue Nov 13 09:21:47 2012
@@ -806,8 +806,7 @@
                                           const DataLayout *TD) {
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
-  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64;
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   ComputeMaskedBits(V, KnownZero, KnownOne, TD);
   unsigned TrailZ = KnownZero.countTrailingOnes();

Modified: llvm/branches/R600/lib/Transforms/Utils/LoopSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/LoopSimplify.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/LoopSimplify.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/LoopSimplify.cpp Tue Nov 13 09:21:47 2012
@@ -46,6 +46,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -89,6 +90,7 @@
 
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DependenceAnalysis>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
     }
 
@@ -194,6 +196,11 @@
 
           BI->setCondition(ConstantInt::get(Cond->getType(),
                                             !L->contains(BI->getSuccessor(0))));
+
+          // This may make the loop analyzable, force SCEV recomputation.
+          if (SE)
+            SE->forgetLoop(L);
+
           Changed = true;
         }
       }

Modified: llvm/branches/R600/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Tue Nov 13 09:21:47 2012
@@ -212,9 +212,13 @@
     ///
     DenseMap<AllocaInst*, unsigned>  AllocaLookup;
 
-    /// NewPhiNodes - The PhiNodes we're adding.
+    /// NewPhiNodes - The PhiNodes we're adding.  That map is used to simplify
+    /// some Phi nodes as we iterate over it, so it should have deterministic
+    /// iterators.  We could use a MapVector, but since we already maintain a
+    /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is
+    /// more efficient (also supports removal).
     ///
-    DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+    DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes;
     
     /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
     /// it corresponds to.
@@ -588,7 +592,11 @@
   while (EliminatedAPHI) {
     EliminatedAPHI = false;
     
-    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+    // Iterating over NewPhiNodes is deterministic, so it is safe to try to
+    // simplify and RAUW them as we go.  If it was not, we could add uses to
+    // the values we replace with in a non deterministic order, thus creating
+    // non deterministic def->use chains.
+    for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
            NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
       PHINode *PN = I->second;
 
@@ -612,7 +620,7 @@
   // have incoming values for all predecessors.  Loop over all PHI nodes we have
   // created, inserting undef values if they are missing any incoming values.
   //
-  for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+  for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I =
          NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
     // We want to do this once per basic block.  As such, only process a block
     // when we find the PHI that is the first entry in the block.
@@ -992,7 +1000,7 @@
 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
                                   unsigned &Version) {
   // Look up the basic-block in question.
-  PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+  PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
 
   // If the BB already has a phi node added for the i'th alloca then we're done!
   if (PN) return false;

Modified: llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp Tue Nov 13 09:21:47 2012
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IRBuilder.h"
@@ -39,7 +40,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/NoFolder.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
+#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <set>
@@ -82,6 +83,7 @@
 
 class SimplifyCFGOpt {
   const DataLayout *const TD;
+  const TargetTransformInfo *const TTI;
 
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -101,7 +103,8 @@
   bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
 
 public:
-  explicit SimplifyCFGOpt(const DataLayout *td) : TD(td) {}
+  SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
+      : TD(td), TTI(tti) {}
   bool run(BasicBlock *BB);
 };
 }
@@ -392,7 +395,7 @@
 
   // This is some kind of pointer constant. Turn it into a pointer-sized
   // ConstantInt if possible.
-  IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+  IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
 
   // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
   if (isa<ConstantPointerNull>(V))
@@ -3193,26 +3196,95 @@
       isa<UndefValue>(C);
 }
 
-/// GetCaseResulsts - Try to determine the resulting constant values in phi
-/// nodes at the common destination basic block for one of the case
-/// destinations of a switch instruction.
+/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up
+/// its constant value in ConstantPool, returning 0 if it's not there.
+static Constant *LookupConstant(Value *V,
+                         const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C;
+  return ConstantPool.lookup(V);
+}
+
+/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, 0 otherwise.
+static Constant *ConstantFold(Instruction *I,
+                         const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
+    if (!A)
+      return 0;
+    Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
+    if (!B)
+      return 0;
+    return ConstantExpr::get(BO->getOpcode(), A, B);
+  }
+
+  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+    if (!A)
+      return 0;
+    Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
+    if (!B)
+      return 0;
+    return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
+  }
+
+  if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+    Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
+    if (!A)
+      return 0;
+    if (A->isAllOnesValue())
+      return LookupConstant(Select->getTrueValue(), ConstantPool);
+    if (A->isNullValue())
+      return LookupConstant(Select->getFalseValue(), ConstantPool);
+    return 0;
+  }
+
+  if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+    Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
+    if (!A)
+      return 0;
+    return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
+  }
+
+  return 0;
+}
+
+/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (0 for the default
+/// case), of a switch instruction SI.
 static bool GetCaseResults(SwitchInst *SI,
+                           ConstantInt *CaseVal,
                            BasicBlock *CaseDest,
                            BasicBlock **CommonDest,
                            SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
   // The block from which we enter the common destination.
   BasicBlock *Pred = SI->getParent();
 
-  // If CaseDest is empty, continue to its successor.
-  if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() &&
-      !isa<PHINode>(CaseDest->begin())) {
-
-    TerminatorInst *Terminator = CaseDest->getTerminator();
-    if (Terminator->getNumSuccessors() != 1)
-      return false;
-
-    Pred = CaseDest;
-    CaseDest = Terminator->getSuccessor(0);
+  // If CaseDest is empty except for some side-effect free instructions through
+  // which we can constant-propagate the CaseVal, continue to its successor.
+  SmallDenseMap<Value*, Constant*> ConstantPool;
+  ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+  for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+       ++I) {
+    if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+      // If the terminator is a simple branch, continue to the next block.
+      if (T->getNumSuccessors() != 1)
+        return false;
+      Pred = CaseDest;
+      CaseDest = T->getSuccessor(0);
+    } else if (isa<DbgInfoIntrinsic>(I)) {
+      // Skip debug intrinsic.
+      continue;
+    } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+      // Instruction is side-effect free and constant.
+      ConstantPool.insert(std::make_pair(I, C));
+    } else {
+      break;
+    }
   }
 
   // If we did not have a CommonDest before, use the current one.
@@ -3229,10 +3301,17 @@
     if (Idx == -1)
       continue;
 
-    Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx));
+    Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
+                                        ConstantPool);
     if (!ConstVal)
       return false;
 
+    // Note: If the constant comes from constant-propagating the case value
+    // through the CaseDest basic block, it will be safe to remove the
+    // instructions in that block. They cannot be used (except in the phi nodes
+    // we visit) outside CaseDest, because that block does not dominate its
+    // successor. If it did, we would not be in this phi node.
+
     // Be conservative about which kinds of constants we support.
     if (!ValidLookupTableConstant(ConstVal))
       return false;
@@ -3322,7 +3401,7 @@
     TableContents[Idx] = CaseRes;
 
     if (CaseRes != SingleValue)
-      SingleValue = NULL;
+      SingleValue = 0;
   }
 
   // Fill in any holes in the table with the default result.
@@ -3333,7 +3412,7 @@
     }
 
     if (DefaultValue != SingleValue)
-      SingleValue = NULL;
+      SingleValue = 0;
   }
 
   // If each element in the table contains the same value, we only need to store
@@ -3455,9 +3534,12 @@
 /// replace the switch with lookup tables.
 static bool SwitchToLookupTable(SwitchInst *SI,
                                 IRBuilder<> &Builder,
-                                const DataLayout* TD) {
+                                const DataLayout* TD,
+                                const TargetTransformInfo *TTI) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
-  // FIXME: Handle unreachable cases.
+
+  if (TTI && !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
+    return false;
 
   // FIXME: If the switch is too sparse for a lookup table, perhaps we could
   // split off a dense part and build a lookup table for that.
@@ -3480,7 +3562,7 @@
   ConstantInt *MinCaseVal = CI.getCaseValue();
   ConstantInt *MaxCaseVal = CI.getCaseValue();
 
-  BasicBlock *CommonDest = NULL;
+  BasicBlock *CommonDest = 0;
   typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
   SmallDenseMap<PHINode*, ResultListTy> ResultLists;
   SmallDenseMap<PHINode*, Constant*> DefaultResults;
@@ -3497,7 +3579,8 @@
     // Resulting value at phi nodes for this case value.
     typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
     ResultsTy Results;
-    if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results))
+    if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+                        Results))
       return false;
 
     // Append the result from this case to the list for each phi.
@@ -3510,7 +3593,8 @@
 
   // Get the resulting values for the default case.
   SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
-  if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList))
+  if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+                      DefaultResultsList))
     return false;
   for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
     PHINode *PHI = DefaultResultsList[I].first;
@@ -3579,32 +3663,30 @@
 }
 
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
-  // If this switch is too complex to want to look at, ignore it.
-  if (!isValueEqualityComparison(SI))
-    return false;
-
   BasicBlock *BB = SI->getParent();
 
-  // If we only have one predecessor, and if it is a branch on this value,
-  // see if that predecessor totally determines the outcome of this switch.
-  if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
-    if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-      return SimplifyCFG(BB) | true;
+  if (isValueEqualityComparison(SI)) {
+    // If we only have one predecessor, and if it is a branch on this value,
+    // see if that predecessor totally determines the outcome of this switch.
+    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+      if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+        return SimplifyCFG(BB) | true;
 
-  Value *Cond = SI->getCondition();
-  if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
-    if (SimplifySwitchOnSelect(SI, Select))
-      return SimplifyCFG(BB) | true;
+    Value *Cond = SI->getCondition();
+    if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+      if (SimplifySwitchOnSelect(SI, Select))
+        return SimplifyCFG(BB) | true;
 
-  // If the block only contains the switch, see if we can fold the block
-  // away into any preds.
-  BasicBlock::iterator BBI = BB->begin();
-  // Ignore dbg intrinsics.
-  while (isa<DbgInfoIntrinsic>(BBI))
-    ++BBI;
-  if (SI == &*BBI)
-    if (FoldValueComparisonIntoPredecessors(SI, Builder))
-      return SimplifyCFG(BB) | true;
+    // If the block only contains the switch, see if we can fold the block
+    // away into any preds.
+    BasicBlock::iterator BBI = BB->begin();
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(BBI))
+      ++BBI;
+    if (SI == &*BBI)
+      if (FoldValueComparisonIntoPredecessors(SI, Builder))
+        return SimplifyCFG(BB) | true;
+  }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
@@ -3617,7 +3699,7 @@
   if (ForwardSwitchConditionToPHI(SI))
     return SimplifyCFG(BB) | true;
 
-  if (SwitchToLookupTable(SI, Builder, TD))
+  if (SwitchToLookupTable(SI, Builder, TD, TTI))
     return SimplifyCFG(BB) | true;
 
   return false;
@@ -3914,6 +3996,7 @@
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
 /// of the CFG.  It returns true if a modification was made.
 ///
-bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD) {
-  return SimplifyCFGOpt(TD).run(BB);
+bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
+                       const TargetTransformInfo *TTI) {
+  return SimplifyCFGOpt(TD, TTI).run(BB);
 }

Modified: llvm/branches/R600/lib/Transforms/Utils/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/SimplifyLibCalls.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/SimplifyLibCalls.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/SimplifyLibCalls.cpp Tue Nov 13 09:21:47 2012
@@ -64,6 +64,26 @@
 };
 
 //===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
 // Fortified Library Call Optimizations
 //===----------------------------------------------------------------------===//
 
@@ -188,8 +208,8 @@
       return Src;
 
     // If a) we don't have any length information, or b) we know this will
-    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
-    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // fit then just lower to a plain strcpy. Otherwise we'll keep our
+    // strcpy_chk call which may fail at runtime if the size is too long.
     // TODO: It might be nice to get a maximum length out of the possible
     // string lengths for varying.
     if (isFoldable(2, 1, true)) {
@@ -213,6 +233,56 @@
   }
 };
 
+struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    StringRef Name = Callee->getName();
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+      return 0;
+
+    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+    if (Dst == Src) {  // stpcpy(x,x)  -> x+strlen(x)
+      Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+    }
+
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain stpcpy. Otherwise we'll keep our
+    // stpcpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(2, 1, true)) {
+      Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+      return Ret;
+    } else {
+      // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
+      uint64_t Len = GetStringLength(Src);
+      if (Len == 0) return 0;
+
+      // This optimization require DataLayout.
+      if (!TD) return 0;
+
+      Type *PT = FT->getParamType(0);
+      Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+      Value *DstEnd = B.CreateGEP(Dst,
+                                  ConstantInt::get(TD->getIntPtrType(PT),
+                                                   Len - 1));
+      if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
+        return 0;
+      return DstEnd;
+    }
+    return 0;
+  }
+};
+
 struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     this->CI = CI;
@@ -542,6 +612,166 @@
   }
 };
 
+struct StpCpyOpt: public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "stpcpy" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+    if (Dst == Src) {  // stpcpy(x,x)  -> x+strlen(x)
+      Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+    }
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+
+    Type *PT = FT->getParamType(0);
+    Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+    Value *DstEnd = B.CreateGEP(Dst,
+                                ConstantInt::get(TD->getIntPtrType(PT),
+                                                 Len - 1));
+
+    // We have enough information to now generate the memcpy call to do the
+    // copy for us.  Make a memcpy to copy the nul byte with align = 1.
+    B.CreateMemCpy(Dst, Src, LenV, 1);
+    return DstEnd;
+  }
+};
+
+struct StrNCpyOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    Value *LenOp = CI->getArgOperand(2);
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;
+
+    if (SrcLen == 0) {
+      // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+      B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+      return Dst;
+    }
+
+    uint64_t Len;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    // Let strncpy handle the zero padding
+    if (Len > SrcLen+1) return 0;
+
+    Type *PT = FT->getParamType(0);
+    // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+    B.CreateMemCpy(Dst, Src,
+                   ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+
+    return Dst;
+  }
+};
+
+struct StrLenOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    Value *Src = CI->getArgOperand(0);
+
+    // Constant folding: strlen("xyz") -> 3
+    if (uint64_t Len = GetStringLength(Src))
+      return ConstantInt::get(CI->getType(), Len-1);
+
+    // strlen(x) != 0 --> *x != 0
+    // strlen(x) == 0 --> *x == 0
+    if (isOnlyUsedInZeroEqualityComparison(CI))
+      return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+    return 0;
+  }
+};
+
+struct StrPBrkOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        FT->getReturnType() != FT->getParamType(0))
+      return 0;
+
+    StringRef S1, S2;
+    bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strpbrk(s, "") -> NULL
+    // strpbrk("", s) -> NULL
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2) {
+      size_t I = S1.find_first_of(S2);
+      if (I == std::string::npos) // No match.
+        return Constant::getNullValue(CI->getType());
+
+      return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    }
+
+    // strpbrk(s, "a") -> strchr(s, 'a')
+    if (TD && HasS2 && S2.size() == 1)
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+
+    return 0;
+  }
+};
+
+struct StrToOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy())
+      return 0;
+
+    Value *EndPtr = CI->getArgOperand(1);
+    if (isa<ConstantPointerNull>(EndPtr)) {
+      // With a null EndPtr, this function won't capture the main argument.
+      // It would be readonly too, except that it still may write to errno.
+      CI->addAttribute(1, Attributes::get(Callee->getContext(),
+                                          Attributes::NoCapture));
+    }
+
+    return 0;
+  }
+};
+
 } // End anonymous namespace.
 
 namespace llvm {
@@ -556,6 +786,7 @@
   MemMoveChkOpt MemMoveChk;
   MemSetChkOpt MemSetChk;
   StrCpyChkOpt StrCpyChk;
+  StpCpyChkOpt StpCpyChk;
   StrNCpyChkOpt StrNCpyChk;
 
   // String and memory library call optimizations.
@@ -566,6 +797,11 @@
   StrCmpOpt StrCmp;
   StrNCmpOpt StrNCmp;
   StrCpyOpt StrCpy;
+  StpCpyOpt StpCpy;
+  StrNCpyOpt StrNCpy;
+  StrLenOpt StrLen;
+  StrPBrkOpt StrPBrk;
+  StrToOpt StrTo;
 
   void initOptimizations();
 public:
@@ -583,7 +819,7 @@
   Optimizations["__memmove_chk"] = &MemMoveChk;
   Optimizations["__memset_chk"] = &MemSetChk;
   Optimizations["__strcpy_chk"] = &StrCpyChk;
-  Optimizations["__stpcpy_chk"] = &StrCpyChk;
+  Optimizations["__stpcpy_chk"] = &StpCpyChk;
   Optimizations["__strncpy_chk"] = &StrNCpyChk;
   Optimizations["__stpncpy_chk"] = &StrNCpyChk;
 
@@ -595,6 +831,17 @@
   Optimizations["strcmp"] = &StrCmp;
   Optimizations["strncmp"] = &StrNCmp;
   Optimizations["strcpy"] = &StrCpy;
+  Optimizations["stpcpy"] = &StpCpy;
+  Optimizations["strncpy"] = &StrNCpy;
+  Optimizations["strlen"] = &StrLen;
+  Optimizations["strpbrk"] = &StrPBrk;
+  Optimizations["strtol"] = &StrTo;
+  Optimizations["strtod"] = &StrTo;
+  Optimizations["strtof"] = &StrTo;
+  Optimizations["strtoul"] = &StrTo;
+  Optimizations["strtoll"] = &StrTo;
+  Optimizations["strtold"] = &StrTo;
+  Optimizations["strtoull"] = &StrTo;
 }
 
 Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {

Modified: llvm/branches/R600/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Vectorize/BBVectorize.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Vectorize/BBVectorize.cpp Tue Nov 13 09:21:47 2012
@@ -34,6 +34,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -42,16 +43,26 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/DataLayout.h"
+#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Vectorize.h"
 #include <algorithm>
 #include <map>
 using namespace llvm;
 
+static cl::opt<bool>
+IgnoreTargetInfo("bb-vectorize-ignore-target-info",  cl::init(false),
+  cl::Hidden, cl::desc("Ignore target information"));
+
 static cl::opt<unsigned>
 ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
   cl::desc("The required chain depth for vectorization"));
 
+static cl::opt<bool>
+UseChainDepthWithTI("bb-vectorize-use-chain-depth",  cl::init(false),
+  cl::Hidden, cl::desc("Use the chain depth requirement with"
+                       " target information"));
+
 static cl::opt<unsigned>
 SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
   cl::desc("The maximum search distance for instruction pairs"));
@@ -93,8 +104,9 @@
 NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
   cl::desc("Don't try to vectorize floating-point values"));
 
+// FIXME: This should default to false once pointer vector support works.
 static cl::opt<bool>
-NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden,
+NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden,
   cl::desc("Don't try to vectorize pointer values"));
 
 static cl::opt<bool>
@@ -159,6 +171,12 @@
   cl::init(false), cl::Hidden,
   cl::desc("When debugging is enabled, output information on the"
            " cycle-checking process"));
+
+static cl::opt<bool>
+PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair",
+  cl::init(false), cl::Hidden,
+  cl::desc("When debugging is enabled, dump the basic block after"
+           " every pair is fused"));
 #endif
 
 STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
@@ -177,13 +195,19 @@
     BBVectorize(Pass *P, const VectorizeConfig &C)
       : BasicBlockPass(ID), Config(C) {
       AA = &P->getAnalysis<AliasAnalysis>();
+      DT = &P->getAnalysis<DominatorTree>();
       SE = &P->getAnalysis<ScalarEvolution>();
       TD = P->getAnalysisIfAvailable<DataLayout>();
+      TTI = IgnoreTargetInfo ? 0 :
+        P->getAnalysisIfAvailable<TargetTransformInfo>();
+      VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
     }
 
     typedef std::pair<Value *, Value *> ValuePair;
+    typedef std::pair<ValuePair, int> ValuePairWithCost;
     typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
     typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
+    typedef std::pair<VPPair, unsigned> VPPairWithType;
     typedef std::pair<std::multimap<Value *, Value *>::iterator,
               std::multimap<Value *, Value *>::iterator> VPIteratorPair;
     typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
@@ -191,8 +215,11 @@
                 VPPIteratorPair;
 
     AliasAnalysis *AA;
+    DominatorTree *DT;
     ScalarEvolution *SE;
     DataLayout *TD;
+    TargetTransformInfo *TTI;
+    const VectorTargetTransformInfo *VTTI;
 
     // FIXME: const correct?
 
@@ -201,11 +228,23 @@
     bool getCandidatePairs(BasicBlock &BB,
                        BasicBlock::iterator &Start,
                        std::multimap<Value *, Value *> &CandidatePairs,
+                       DenseSet<ValuePair> &FixedOrderPairs,
+                       DenseMap<ValuePair, int> &CandidatePairCostSavings,
                        std::vector<Value *> &PairableInsts, bool NonPow2Len);
 
+    // FIXME: The current implementation does not account for pairs that
+    // are connected in multiple ways. For example:
+    //   C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
+    enum PairConnectionType {
+      PairConnectionDirect,
+      PairConnectionSwap,
+      PairConnectionSplat
+    };
+
     void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
                        std::vector<Value *> &PairableInsts,
-                       std::multimap<ValuePair, ValuePair> &ConnectedPairs);
+                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                       DenseMap<VPPair, unsigned> &PairConnectionTypes);
 
     void buildDepMap(BasicBlock &BB,
                        std::multimap<Value *, Value *> &CandidatePairs,
@@ -213,19 +252,29 @@
                        DenseSet<ValuePair> &PairableInstUsers);
 
     void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
+                        DenseMap<ValuePair, int> &CandidatePairCostSavings,
                         std::vector<Value *> &PairableInsts,
+                        DenseSet<ValuePair> &FixedOrderPairs,
+                        DenseMap<VPPair, unsigned> &PairConnectionTypes,
                         std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                        std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
                         DenseSet<ValuePair> &PairableInstUsers,
                         DenseMap<Value *, Value *>& ChosenPairs);
 
     void fuseChosenPairs(BasicBlock &BB,
                      std::vector<Value *> &PairableInsts,
-                     DenseMap<Value *, Value *>& ChosenPairs);
+                     DenseMap<Value *, Value *>& ChosenPairs,
+                     DenseSet<ValuePair> &FixedOrderPairs,
+                     DenseMap<VPPair, unsigned> &PairConnectionTypes,
+                     std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                     std::multimap<ValuePair, ValuePair> &ConnectedPairDeps);
+
 
     bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
 
     bool areInstsCompatible(Instruction *I, Instruction *J,
-                       bool IsSimpleLoadStore, bool NonPow2Len);
+                       bool IsSimpleLoadStore, bool NonPow2Len,
+                       int &CostSavings, int &FixedOrder);
 
     bool trackUsesOfI(DenseSet<Value *> &Users,
                       AliasSetTracker &WriteSet, Instruction *I,
@@ -236,6 +285,7 @@
                       std::multimap<Value *, Value *> &CandidatePairs,
                       std::vector<Value *> &PairableInsts,
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
                       ValuePair P);
 
     bool pairsConflict(ValuePair P, ValuePair Q,
@@ -267,17 +317,21 @@
 
     void findBestTreeFor(
                       std::multimap<Value *, Value *> &CandidatePairs,
+                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
                       std::vector<Value *> &PairableInsts,
+                      DenseSet<ValuePair> &FixedOrderPairs,
+                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
                       DenseSet<ValuePair> &PairableInstUsers,
                       std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
                       DenseMap<Value *, Value *> &ChosenPairs,
                       DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
-                      size_t &BestEffSize, VPIteratorPair ChoiceRange,
+                      int &BestEffSize, VPIteratorPair ChoiceRange,
                       bool UseCycleCheck);
 
     Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
-                     Instruction *J, unsigned o, bool FlipMemInputs);
+                     Instruction *J, unsigned o);
 
     void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
                      unsigned MaskOffset, unsigned NumInElem,
@@ -289,20 +343,20 @@
 
     bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
                        unsigned o, Value *&LOp, unsigned numElemL,
-                       Type *ArgTypeL, Type *ArgTypeR,
+                       Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ,
                        unsigned IdxOff = 0);
 
     Value *getReplacementInput(LLVMContext& Context, Instruction *I,
-                     Instruction *J, unsigned o, bool FlipMemInputs);
+                     Instruction *J, unsigned o, bool IBeforeJ);
 
     void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
                      Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
-                     bool FlipMemInputs);
+                     bool IBeforeJ);
 
     void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
                      Instruction *J, Instruction *K,
                      Instruction *&InsertionPt, Instruction *&K1,
-                     Instruction *&K2, bool FlipMemInputs);
+                     Instruction *&K2);
 
     void collectPairLoadMoveSet(BasicBlock &BB,
                      DenseMap<Value *, Value *> &ChosenPairs,
@@ -314,10 +368,6 @@
                      DenseMap<Value *, Value *> &ChosenPairs,
                      std::multimap<Value *, Value *> &LoadMoveSet);
 
-    void collectPtrInfo(std::vector<Value *> &PairableInsts,
-                        DenseMap<Value *, Value *> &ChosenPairs,
-                        DenseSet<Value *> &LowPtrInsts);
-
     bool canMoveUsesOfIAfterJ(BasicBlock &BB,
                      std::multimap<Value *, Value *> &LoadMoveSet,
                      Instruction *I, Instruction *J);
@@ -330,13 +380,22 @@
     void combineMetadata(Instruction *K, const Instruction *J);
 
     bool vectorizeBB(BasicBlock &BB) {
+      if (!DT->isReachableFromEntry(&BB)) {
+        DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
+              " in " << BB.getParent()->getName() << "\n");
+        return false;
+      }
+
+      DEBUG(if (VTTI) dbgs() << "BBV: using target information\n");
+
       bool changed = false;
       // Iterate a sufficient number of times to merge types of size 1 bit,
       // then 2 bits, then 4, etc. up to half of the target vector width of the
       // target vector register.
       unsigned n = 1;
       for (unsigned v = 2;
-           v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
+           (VTTI || v <= Config.VectorBits) &&
+           (!Config.MaxIter || n <= Config.MaxIter);
            v *= 2, ++n) {
         DEBUG(dbgs() << "BBV: fusing loop #" << n <<
               " for " << BB.getName() << " in " <<
@@ -363,8 +422,12 @@
 
     virtual bool runOnBasicBlock(BasicBlock &BB) {
       AA = &getAnalysis<AliasAnalysis>();
+      DT = &getAnalysis<DominatorTree>();
       SE = &getAnalysis<ScalarEvolution>();
       TD = getAnalysisIfAvailable<DataLayout>();
+      TTI = IgnoreTargetInfo ? 0 :
+        getAnalysisIfAvailable<TargetTransformInfo>();
+      VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
 
       return vectorizeBB(BB);
     }
@@ -372,8 +435,10 @@
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       BasicBlockPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
       AU.addRequired<ScalarEvolution>();
       AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
       AU.setPreservesCFG();
     }
@@ -415,6 +480,10 @@
         T2 = cast<CastInst>(I)->getSrcTy();
       else
         T2 = T1;
+
+      if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+        T2 = SI->getCondition()->getType();
+      }
     }
 
     // Returns the weight associated with the provided value. A chain of
@@ -446,6 +515,62 @@
       return 1;
     }
 
+    // Returns the cost of the provided instruction using VTTI.
+    // This does not handle loads and stores.
+    unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
+      switch (Opcode) {
+      default: break;
+      case Instruction::GetElementPtr:
+        // We mark this instruction as zero-cost because scalar GEPs are usually
+        // lowered to the intruction addressing mode. At the moment we don't
+        // generate vector GEPs.
+        return 0;
+      case Instruction::Br:
+        return VTTI->getCFInstrCost(Opcode);
+      case Instruction::PHI:
+        return 0;
+      case Instruction::Add:
+      case Instruction::FAdd:
+      case Instruction::Sub:
+      case Instruction::FSub:
+      case Instruction::Mul:
+      case Instruction::FMul:
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem:
+      case Instruction::Shl:
+      case Instruction::LShr:
+      case Instruction::AShr:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
+        return VTTI->getArithmeticInstrCost(Opcode, T1);
+      case Instruction::Select:
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        return VTTI->getCmpSelInstrCost(Opcode, T1, T2);
+      case Instruction::ZExt:
+      case Instruction::SExt:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::FPExt:
+      case Instruction::PtrToInt:
+      case Instruction::IntToPtr:
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+      case Instruction::Trunc:
+      case Instruction::FPTrunc:
+      case Instruction::BitCast:
+      case Instruction::ShuffleVector:
+        return VTTI->getCastInstrCost(Opcode, T1, T2);
+      }
+
+      return 1;
+    }
+
     // This determines the relative offset of two loads or stores, returning
     // true if the offset could be determined to be some constant value.
     // For example, if OffsetInElmts == 1, then J accesses the memory directly
@@ -453,20 +578,30 @@
     // directly after J.
     bool getPairPtrInfo(Instruction *I, Instruction *J,
         Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
-        int64_t &OffsetInElmts) {
+        unsigned &IAddressSpace, unsigned &JAddressSpace,
+        int64_t &OffsetInElmts, bool ComputeOffset = true) {
       OffsetInElmts = 0;
-      if (isa<LoadInst>(I)) {
-        IPtr = cast<LoadInst>(I)->getPointerOperand();
-        JPtr = cast<LoadInst>(J)->getPointerOperand();
-        IAlignment = cast<LoadInst>(I)->getAlignment();
-        JAlignment = cast<LoadInst>(J)->getAlignment();
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        LoadInst *LJ = cast<LoadInst>(J);
+        IPtr = LI->getPointerOperand();
+        JPtr = LJ->getPointerOperand();
+        IAlignment = LI->getAlignment();
+        JAlignment = LJ->getAlignment();
+        IAddressSpace = LI->getPointerAddressSpace();
+        JAddressSpace = LJ->getPointerAddressSpace();
       } else {
-        IPtr = cast<StoreInst>(I)->getPointerOperand();
-        JPtr = cast<StoreInst>(J)->getPointerOperand();
-        IAlignment = cast<StoreInst>(I)->getAlignment();
-        JAlignment = cast<StoreInst>(J)->getAlignment();
+        StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
+        IPtr = SI->getPointerOperand();
+        JPtr = SJ->getPointerOperand();
+        IAlignment = SI->getAlignment();
+        JAlignment = SJ->getAlignment();
+        IAddressSpace = SI->getPointerAddressSpace();
+        JAddressSpace = SJ->getPointerAddressSpace();
       }
 
+      if (!ComputeOffset)
+        return true;
+
       const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
       const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
 
@@ -546,11 +681,18 @@
 
     std::vector<Value *> AllPairableInsts;
     DenseMap<Value *, Value *> AllChosenPairs;
+    DenseSet<ValuePair> AllFixedOrderPairs;
+    DenseMap<VPPair, unsigned> AllPairConnectionTypes;
+    std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps;
 
     do {
       std::vector<Value *> PairableInsts;
       std::multimap<Value *, Value *> CandidatePairs;
+      DenseSet<ValuePair> FixedOrderPairs;
+      DenseMap<ValuePair, int> CandidatePairCostSavings;
       ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
+                                         FixedOrderPairs,
+                                         CandidatePairCostSavings,
                                          PairableInsts, NonPow2Len);
       if (PairableInsts.empty()) continue;
 
@@ -563,10 +705,18 @@
       // Note that it only matters that both members of the second pair use some
       // element of the first pair (to allow for splatting).
 
-      std::multimap<ValuePair, ValuePair> ConnectedPairs;
-      computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs);
+      std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps;
+      DenseMap<VPPair, unsigned> PairConnectionTypes;
+      computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs,
+                            PairConnectionTypes);
       if (ConnectedPairs.empty()) continue;
 
+      for (std::multimap<ValuePair, ValuePair>::iterator
+           I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
+           I != IE; ++I) {
+        ConnectedPairDeps.insert(VPPair(I->second, I->first));
+      }
+
       // Build the pairable-instruction dependency map
       DenseSet<ValuePair> PairableInstUsers;
       buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
@@ -578,13 +728,48 @@
       // variables.
 
       DenseMap<Value *, Value *> ChosenPairs;
-      choosePairs(CandidatePairs, PairableInsts, ConnectedPairs,
+      choosePairs(CandidatePairs, CandidatePairCostSavings,
+        PairableInsts, FixedOrderPairs, PairConnectionTypes,
+        ConnectedPairs, ConnectedPairDeps,
         PairableInstUsers, ChosenPairs);
 
       if (ChosenPairs.empty()) continue;
       AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
                               PairableInsts.end());
       AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
+
+      // Only for the chosen pairs, propagate information on fixed-order pairs,
+      // pair connections, and their types to the data structures used by the
+      // pair fusion procedures.
+      for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
+           IE = ChosenPairs.end(); I != IE; ++I) {
+        if (FixedOrderPairs.count(*I))
+          AllFixedOrderPairs.insert(*I);
+        else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
+          AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
+
+        for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin();
+             J != IE; ++J) {
+          DenseMap<VPPair, unsigned>::iterator K =
+            PairConnectionTypes.find(VPPair(*I, *J));
+          if (K != PairConnectionTypes.end()) {
+            AllPairConnectionTypes.insert(*K);
+          } else {
+            K = PairConnectionTypes.find(VPPair(*J, *I));
+            if (K != PairConnectionTypes.end())
+              AllPairConnectionTypes.insert(*K);
+          }
+        }
+      }
+
+      for (std::multimap<ValuePair, ValuePair>::iterator
+           I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
+           I != IE; ++I) {
+        if (AllPairConnectionTypes.count(*I)) {
+          AllConnectedPairs.insert(*I);
+          AllConnectedPairDeps.insert(VPPair(I->second, I->first));
+        }
+      }
     } while (ShouldContinue);
 
     if (AllChosenPairs.empty()) return false;
@@ -597,7 +782,9 @@
     // replaced with a vector_extract on the result.  Subsequent optimization
     // passes should coalesce the build/extract combinations.
 
-    fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs);
+    fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
+                    AllPairConnectionTypes,
+                    AllConnectedPairs, AllConnectedPairDeps);
 
     // It is important to cleanup here so that future iterations of this
     // function have less work to do.
@@ -667,15 +854,22 @@
         !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
       return false;
 
-    if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) {
+    if (T1->getScalarSizeInBits() == 1) {
       if (!Config.VectorizeBools)
         return false;
     } else {
-      if (!Config.VectorizeInts
-          && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+      if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
         return false;
     }
-  
+
+    if (T2->getScalarSizeInBits() == 1) {
+      if (!Config.VectorizeBools)
+        return false;
+    } else {
+      if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
+        return false;
+    }
+
     if (!Config.VectorizeFloats
         && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
       return false;
@@ -691,8 +885,8 @@
          T2->getScalarType()->isPointerTy()))
       return false;
 
-    if (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
-        T2->getPrimitiveSizeInBits() >= Config.VectorBits)
+    if (!VTTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
+                  T2->getPrimitiveSizeInBits() >= Config.VectorBits))
       return false;
 
     return true;
@@ -703,10 +897,14 @@
   // that I has already been determined to be vectorizable and that J is not
   // in the use tree of I.
   bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
-                       bool IsSimpleLoadStore, bool NonPow2Len) {
+                       bool IsSimpleLoadStore, bool NonPow2Len,
+                       int &CostSavings, int &FixedOrder) {
     DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
                      " <-> " << *J << "\n");
 
+    CostSavings = 0;
+    FixedOrder = 0;
+
     // Loads and stores can be merged if they have different alignments,
     // but are otherwise the same.
     if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
@@ -719,38 +917,83 @@
     unsigned MaxTypeBits = std::max(
       IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
       IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
-    if (MaxTypeBits > Config.VectorBits)
+    if (!VTTI && MaxTypeBits > Config.VectorBits)
       return false;
 
     // FIXME: handle addsub-type operations!
 
     if (IsSimpleLoadStore) {
       Value *IPtr, *JPtr;
-      unsigned IAlignment, JAlignment;
+      unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
       int64_t OffsetInElmts = 0;
       if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+            IAddressSpace, JAddressSpace,
             OffsetInElmts) && abs64(OffsetInElmts) == 1) {
-        if (Config.AlignedOnly) {
-          Type *aTypeI = isa<StoreInst>(I) ?
-            cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
-          Type *aTypeJ = isa<StoreInst>(J) ?
-            cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
+        FixedOrder = (int) OffsetInElmts;
+        unsigned BottomAlignment = IAlignment;
+        if (OffsetInElmts < 0) BottomAlignment = JAlignment;
+
+        Type *aTypeI = isa<StoreInst>(I) ?
+          cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
+        Type *aTypeJ = isa<StoreInst>(J) ?
+          cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
+        Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
 
+        if (Config.AlignedOnly) {
           // An aligned load or store is possible only if the instruction
           // with the lower offset has an alignment suitable for the
           // vector type.
 
-          unsigned BottomAlignment = IAlignment;
-          if (OffsetInElmts < 0) BottomAlignment = JAlignment;
-
-          Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
           unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
           if (BottomAlignment < VecAlignment)
             return false;
         }
+
+        if (VTTI) {
+          unsigned ICost = VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
+                                                 IAlignment, IAddressSpace);
+          unsigned JCost = VTTI->getMemoryOpCost(J->getOpcode(), J->getType(),
+                                                 JAlignment, JAddressSpace);
+          unsigned VCost = VTTI->getMemoryOpCost(I->getOpcode(), VType,
+                                                 BottomAlignment,
+                                                 IAddressSpace);
+          if (VCost > ICost + JCost)
+            return false;
+
+          // We don't want to fuse to a type that will be split, even
+          // if the two input types will also be split and there is no other
+          // associated cost.
+          unsigned VParts = VTTI->getNumberOfParts(VType);
+          if (VParts > 1)
+            return false;
+          else if (!VParts && VCost == ICost + JCost)
+            return false;
+
+          CostSavings = ICost + JCost - VCost;
+        }
       } else {
         return false;
       }
+    } else if (VTTI) {
+      unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
+      unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
+      Type *VT1 = getVecTypeForPair(IT1, JT1),
+           *VT2 = getVecTypeForPair(IT2, JT2);
+      unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
+
+      if (VCost > ICost + JCost)
+        return false;
+
+      // We don't want to fuse to a type that will be split, even
+      // if the two input types will also be split and there is no other
+      // associated cost.
+      unsigned VParts = VTTI->getNumberOfParts(VT1);
+      if (VParts > 1)
+        return false;
+      else if (!VParts && VCost == ICost + JCost)
+        return false;
+
+      CostSavings = ICost + JCost - VCost;
     }
 
     // The powi intrinsic is special because only the first argument is
@@ -833,6 +1076,8 @@
   bool BBVectorize::getCandidatePairs(BasicBlock &BB,
                        BasicBlock::iterator &Start,
                        std::multimap<Value *, Value *> &CandidatePairs,
+                       DenseSet<ValuePair> &FixedOrderPairs,
+                       DenseMap<ValuePair, int> &CandidatePairCostSavings,
                        std::vector<Value *> &PairableInsts, bool NonPow2Len) {
     BasicBlock::iterator E = BB.end();
     if (Start == E) return false;
@@ -869,7 +1114,9 @@
 
         // J does not use I, and comes before the first use of I, so it can be
         // merged with I if the instructions are compatible.
-        if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue;
+        int CostSavings, FixedOrder;
+        if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
+            CostSavings, FixedOrder)) continue;
 
         // J is a candidate for merging with I.
         if (!PairableInsts.size() ||
@@ -878,6 +1125,14 @@
         }
 
         CandidatePairs.insert(ValuePair(I, J));
+        if (VTTI)
+          CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
+                                                            CostSavings));
+
+        if (FixedOrder == 1)
+          FixedOrderPairs.insert(ValuePair(I, J));
+        else if (FixedOrder == -1)
+          FixedOrderPairs.insert(ValuePair(J, I));
 
         // The next call to this function must start after the last instruction
         // selected during this invocation.
@@ -887,7 +1142,8 @@
         }
 
         DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
-                     << *I << " <-> " << *J << "\n");
+                     << *I << " <-> " << *J << " (cost savings: " <<
+                     CostSavings << ")\n");
 
         // If we have already found too many pairs, break here and this function
         // will be called again starting after the last instruction selected
@@ -915,6 +1171,7 @@
                       std::multimap<Value *, Value *> &CandidatePairs,
                       std::vector<Value *> &PairableInsts,
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
                       ValuePair P) {
     StoreInst *SI, *SJ;
 
@@ -946,12 +1203,18 @@
         VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
 
         // Look for <I, J>:
-        if (isSecondInIteratorPair<Value*>(*J, IPairRange))
-          ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+        if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+          VPPair VP(P, ValuePair(*I, *J));
+          ConnectedPairs.insert(VP);
+          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
+        }
 
         // Look for <J, I>:
-        if (isSecondInIteratorPair<Value*>(*I, JPairRange))
-          ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
+        if (isSecondInIteratorPair<Value*>(*I, JPairRange)) {
+          VPPair VP(P, ValuePair(*J, *I));
+          ConnectedPairs.insert(VP);
+          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
+        }
       }
 
       if (Config.SplatBreaksChain) continue;
@@ -962,8 +1225,11 @@
             P.first == SJ->getPointerOperand())
           continue;
 
-        if (isSecondInIteratorPair<Value*>(*J, IPairRange))
-          ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+        if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+          VPPair VP(P, ValuePair(*I, *J));
+          ConnectedPairs.insert(VP);
+          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
+        }
       }
     }
 
@@ -985,8 +1251,11 @@
             P.second == SJ->getPointerOperand())
           continue;
 
-        if (isSecondInIteratorPair<Value*>(*J, IPairRange))
-          ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+        if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+          VPPair VP(P, ValuePair(*I, *J));
+          ConnectedPairs.insert(VP);
+          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
+        }
       }
     }
   }
@@ -997,7 +1266,8 @@
   void BBVectorize::computeConnectedPairs(
                       std::multimap<Value *, Value *> &CandidatePairs,
                       std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs) {
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseMap<VPPair, unsigned> &PairConnectionTypes) {
 
     for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
          PE = PairableInsts.end(); PI != PE; ++PI) {
@@ -1006,7 +1276,7 @@
       for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
            P != choiceRange.second; ++P)
         computePairsConnectedTo(CandidatePairs, PairableInsts,
-                                ConnectedPairs, *P);
+                                ConnectedPairs, PairConnectionTypes, *P);
     }
 
     DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
@@ -1341,13 +1611,17 @@
   // pairs, given the choice of root pairs as an iterator range.
   void BBVectorize::findBestTreeFor(
                       std::multimap<Value *, Value *> &CandidatePairs,
+                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
                       std::vector<Value *> &PairableInsts,
+                      DenseSet<ValuePair> &FixedOrderPairs,
+                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
                       DenseSet<ValuePair> &PairableInstUsers,
                       std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
                       DenseMap<Value *, Value *> &ChosenPairs,
                       DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
-                      size_t &BestEffSize, VPIteratorPair ChoiceRange,
+                      int &BestEffSize, VPIteratorPair ChoiceRange,
                       bool UseCycleCheck) {
     for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
          J != ChoiceRange.second; ++J) {
@@ -1397,17 +1671,243 @@
                    PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
                    PrunedTree, *J, UseCycleCheck);
 
-      size_t EffSize = 0;
-      for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
-           E = PrunedTree.end(); S != E; ++S)
-        EffSize += getDepthFactor(S->first);
+      int EffSize = 0;
+      if (VTTI) {
+        DenseSet<Value *> PrunedTreeInstrs;
+        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+             E = PrunedTree.end(); S != E; ++S) {
+          PrunedTreeInstrs.insert(S->first);
+          PrunedTreeInstrs.insert(S->second);
+        }
+
+        // The set of pairs that have already contributed to the total cost.
+        DenseSet<ValuePair> IncomingPairs;
+
+        // The node weights represent the cost savings associated with
+        // fusing the pair of instructions.
+        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+             E = PrunedTree.end(); S != E; ++S) {
+          bool FlipOrder = false;
+
+          if (getDepthFactor(S->first)) {
+            int ESContrib = CandidatePairCostSavings.find(*S)->second;
+            DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
+                   << *S->first << " <-> " << *S->second << "} = " <<
+                   ESContrib << "\n");
+            EffSize += ESContrib;
+          }
+
+          // The edge weights contribute in a negative sense: they represent
+          // the cost of shuffles.
+          VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S);
+          if (IP.first != ConnectedPairDeps.end()) {
+            unsigned NumDepsDirect = 0, NumDepsSwap = 0;
+            for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+                 Q != IP.second; ++Q) {
+              if (!PrunedTree.count(Q->second))
+                continue;
+              DenseMap<VPPair, unsigned>::iterator R =
+                PairConnectionTypes.find(VPPair(Q->second, Q->first));
+              assert(R != PairConnectionTypes.end() &&
+                     "Cannot find pair connection type");
+              if (R->second == PairConnectionDirect)
+                ++NumDepsDirect;
+              else if (R->second == PairConnectionSwap)
+                ++NumDepsSwap;
+            }
+
+            // If there are more swaps than direct connections, then
+            // the pair order will be flipped during fusion. So the real
+            // number of swaps is the minimum number.
+            FlipOrder = !FixedOrderPairs.count(*S) &&
+              ((NumDepsSwap > NumDepsDirect) ||
+                FixedOrderPairs.count(ValuePair(S->second, S->first)));
+
+            for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+                 Q != IP.second; ++Q) {
+              if (!PrunedTree.count(Q->second))
+                continue;
+              DenseMap<VPPair, unsigned>::iterator R =
+                PairConnectionTypes.find(VPPair(Q->second, Q->first));
+              assert(R != PairConnectionTypes.end() &&
+                     "Cannot find pair connection type");
+              Type *Ty1 = Q->second.first->getType(),
+                   *Ty2 = Q->second.second->getType();
+              Type *VTy = getVecTypeForPair(Ty1, Ty2);
+              if ((R->second == PairConnectionDirect && FlipOrder) ||
+                  (R->second == PairConnectionSwap && !FlipOrder)  ||
+                  R->second == PairConnectionSplat) {
+                int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+                                                   VTy, VTy);
+                DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+                  *Q->second.first << " <-> " << *Q->second.second <<
+                    "} -> {" <<
+                  *S->first << " <-> " << *S->second << "} = " <<
+                   ESContrib << "\n");
+                EffSize -= ESContrib;
+              }
+            }
+          }
+
+          // Compute the cost of outgoing edges. We assume that edges outgoing
+          // to shuffles, inserts or extracts can be merged, and so contribute
+          // no additional cost.
+          if (!S->first->getType()->isVoidTy()) {
+            Type *Ty1 = S->first->getType(),
+                 *Ty2 = S->second->getType();
+            Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+            bool NeedsExtraction = false;
+            for (Value::use_iterator I = S->first->use_begin(),
+                 IE = S->first->use_end(); I != IE; ++I) {
+              if (isa<ShuffleVectorInst>(*I) ||
+                  isa<InsertElementInst>(*I) ||
+                  isa<ExtractElementInst>(*I))
+                continue;
+              if (PrunedTreeInstrs.count(*I))
+                continue;
+              NeedsExtraction = true;
+              break;
+            }
+
+            if (NeedsExtraction) {
+              int ESContrib;
+              if (Ty1->isVectorTy())
+                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+                                               Ty1, VTy);
+              else
+                ESContrib = (int) VTTI->getVectorInstrCost(
+                                    Instruction::ExtractElement, VTy, 0);
+
+              DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+                *S->first << "} = " << ESContrib << "\n");
+              EffSize -= ESContrib;
+            }
+
+            NeedsExtraction = false;
+            for (Value::use_iterator I = S->second->use_begin(),
+                 IE = S->second->use_end(); I != IE; ++I) {
+              if (isa<ShuffleVectorInst>(*I) ||
+                  isa<InsertElementInst>(*I) ||
+                  isa<ExtractElementInst>(*I))
+                continue;
+              if (PrunedTreeInstrs.count(*I))
+                continue;
+              NeedsExtraction = true;
+              break;
+            }
+
+            if (NeedsExtraction) {
+              int ESContrib;
+              if (Ty2->isVectorTy())
+                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+                                               Ty2, VTy);
+              else
+                ESContrib = (int) VTTI->getVectorInstrCost(
+                                    Instruction::ExtractElement, VTy, 1);
+              DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+                *S->second << "} = " << ESContrib << "\n");
+              EffSize -= ESContrib;
+            }
+          }
+
+          // Compute the cost of incoming edges.
+          if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
+            Instruction *S1 = cast<Instruction>(S->first),
+                        *S2 = cast<Instruction>(S->second);
+            for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
+              Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
+
+              // Combining constants into vector constants (or small vector
+              // constants into larger ones are assumed free).
+              if (isa<Constant>(O1) && isa<Constant>(O2))
+                continue;
+
+              if (FlipOrder)
+                std::swap(O1, O2);
+
+              ValuePair VP  = ValuePair(O1, O2);
+              ValuePair VPR = ValuePair(O2, O1);
+
+              // Internal edges are not handled here.
+              if (PrunedTree.count(VP) || PrunedTree.count(VPR))
+                continue;
+
+              Type *Ty1 = O1->getType(),
+                   *Ty2 = O2->getType();
+              Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+              // Combining vector operations of the same type is also assumed
+              // folded with other operations.
+              if (Ty1 == Ty2 &&
+                  (isa<ShuffleVectorInst>(O1) ||
+                   isa<InsertElementInst>(O1) ||
+                   isa<InsertElementInst>(O1)) &&
+                  (isa<ShuffleVectorInst>(O2) ||
+                   isa<InsertElementInst>(O2) ||
+                   isa<InsertElementInst>(O2)))
+                continue;
+
+              int ESContrib;
+              // This pair has already been formed.
+              if (IncomingPairs.count(VP)) {
+                continue;
+              } else if (IncomingPairs.count(VPR)) {
+                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+                                               VTy, VTy);
+              } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
+                ESContrib = (int) VTTI->getVectorInstrCost(
+                                    Instruction::InsertElement, VTy, 0);
+                ESContrib += (int) VTTI->getVectorInstrCost(
+                                     Instruction::InsertElement, VTy, 1);
+              } else if (!Ty1->isVectorTy()) {
+                // O1 needs to be inserted into a vector of size O2, and then
+                // both need to be shuffled together.
+                ESContrib = (int) VTTI->getVectorInstrCost(
+                                    Instruction::InsertElement, Ty2, 0);
+                ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+                                                VTy, Ty2);
+              } else if (!Ty2->isVectorTy()) {
+                // O2 needs to be inserted into a vector of size O1, and then
+                // both need to be shuffled together.
+                ESContrib = (int) VTTI->getVectorInstrCost(
+                                    Instruction::InsertElement, Ty1, 0);
+                ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+                                                VTy, Ty1);
+              } else {
+                Type *TyBig = Ty1, *TySmall = Ty2;
+                if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
+                  std::swap(TyBig, TySmall);
+
+                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+                                               VTy, TyBig);
+                if (TyBig != TySmall)
+                  ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+                                                  TyBig, TySmall);
+              }
+
+              DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
+                     << *O1 << " <-> " << *O2 << "} = " <<
+                     ESContrib << "\n");
+              EffSize -= ESContrib;
+              IncomingPairs.insert(VP);
+            }
+          }
+        }
+      } else {
+        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+             E = PrunedTree.end(); S != E; ++S)
+          EffSize += (int) getDepthFactor(S->first);
+      }
 
       DEBUG(if (DebugPairSelection)
              dbgs() << "BBV: found pruned Tree for pair {"
              << *J->first << " <-> " << *J->second << "} of depth " <<
              MaxDepth << " and size " << PrunedTree.size() <<
             " (effective size: " << EffSize << ")\n");
-      if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
+      if (((VTTI && !UseChainDepthWithTI) ||
+            MaxDepth >= Config.ReqChainDepth) &&
+          EffSize > 0 && EffSize > BestEffSize) {
         BestMaxDepth = MaxDepth;
         BestEffSize = EffSize;
         BestTree = PrunedTree;
@@ -1419,8 +1919,12 @@
   // that will be fused into vector instructions.
   void BBVectorize::choosePairs(
                       std::multimap<Value *, Value *> &CandidatePairs,
+                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
                       std::vector<Value *> &PairableInsts,
+                      DenseSet<ValuePair> &FixedOrderPairs,
+                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
                       DenseSet<ValuePair> &PairableInstUsers,
                       DenseMap<Value *, Value *>& ChosenPairs) {
     bool UseCycleCheck =
@@ -1435,9 +1939,12 @@
       VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
 
       // The best pair to choose and its tree:
-      size_t BestMaxDepth = 0, BestEffSize = 0;
+      size_t BestMaxDepth = 0;
+      int BestEffSize = 0;
       DenseSet<ValuePair> BestTree;
-      findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+      findBestTreeFor(CandidatePairs, CandidatePairCostSavings,
+                      PairableInsts, FixedOrderPairs, PairConnectionTypes,
+                      ConnectedPairs, ConnectedPairDeps,
                       PairableInstUsers, PairableInstUserMap, ChosenPairs,
                       BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
                       UseCycleCheck);
@@ -1490,24 +1997,19 @@
   // Returns the value that is to be used as the pointer input to the vector
   // instruction that fuses I with J.
   Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
-                     Instruction *I, Instruction *J, unsigned o,
-                     bool FlipMemInputs) {
+                     Instruction *I, Instruction *J, unsigned o) {
     Value *IPtr, *JPtr;
-    unsigned IAlignment, JAlignment;
+    unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
     int64_t OffsetInElmts;
 
-    // Note: the analysis might fail here, that is why FlipMemInputs has
+    // Note: the analysis might fail here, that is why the pair order has
     // been precomputed (OffsetInElmts must be unused here).
     (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
-                          OffsetInElmts);
+                          IAddressSpace, JAddressSpace,
+                          OffsetInElmts, false);
 
     // The pointer value is taken to be the one with the lowest offset.
-    Value *VPtr;
-    if (!FlipMemInputs) {
-      VPtr = IPtr;
-    } else {
-      VPtr = JPtr;
-    }
+    Value *VPtr = IPtr;
 
     Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
     Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
@@ -1515,7 +2017,7 @@
     Type *VArgPtrType = PointerType::get(VArgType,
       cast<PointerType>(IPtr->getType())->getAddressSpace());
     return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
-                        /* insert before */ FlipMemInputs ? J : I);
+                        /* insert before */ I);
   }
 
   void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
@@ -1585,7 +2087,7 @@
                                   Instruction *J, unsigned o, Value *&LOp,
                                   unsigned numElemL,
                                   Type *ArgTypeL, Type *ArgTypeH,
-                                  unsigned IdxOff) {
+                                  bool IBeforeJ, unsigned IdxOff) {
     bool ExpandedIEChain = false;
     if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
       // If we have a pure insertelement chain, then this can be rewritten
@@ -1619,8 +2121,9 @@
           LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
                              ConstantInt::get(Type::getInt32Ty(Context),
                                               i + IdxOff),
-                             getReplacementName(I, true, o, i+1));
-          LIENext->insertBefore(J);
+                             getReplacementName(IBeforeJ ? I : J,
+                                                true, o, i+1));
+          LIENext->insertBefore(IBeforeJ ? J : I);
           LIEPrev = LIENext;
         }
 
@@ -1635,7 +2138,7 @@
   // Returns the value to be used as the specified operand of the vector
   // instruction that fuses I with J.
   Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
-                     Instruction *J, unsigned o, bool FlipMemInputs) {
+                     Instruction *J, unsigned o, bool IBeforeJ) {
     Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
     Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
 
@@ -1646,12 +2149,6 @@
 
     Instruction *L = I, *H = J;
     Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
-    if (FlipMemInputs) {
-      L = J;
-      H = I;
-      ArgTypeL = ArgTypeJ;
-      ArgTypeH = ArgTypeI;
-    }
 
     unsigned numElemL;
     if (ArgTypeL->isVectorTy())
@@ -1804,8 +2301,9 @@
           Instruction *S =
             new ShuffleVectorInst(I1, UndefValue::get(I1T),
                                   ConstantVector::get(Mask),
-                                  getReplacementName(I, true, o));
-          S->insertBefore(J);
+                                  getReplacementName(IBeforeJ ? I : J,
+                                                     true, o));
+          S->insertBefore(IBeforeJ ? J : I);
           return S;
         }
 
@@ -1826,8 +2324,9 @@
           Instruction *NewI1 =
             new ShuffleVectorInst(I1, UndefValue::get(I1T),
                                   ConstantVector::get(Mask),
-                                  getReplacementName(I, true, o, 1));
-          NewI1->insertBefore(J);
+                                  getReplacementName(IBeforeJ ? I : J,
+                                                     true, o, 1));
+          NewI1->insertBefore(IBeforeJ ? J : I);
           I1 = NewI1;
           I1T = I2T;
           I1Elem = I2Elem;
@@ -1842,8 +2341,9 @@
           Instruction *NewI2 =
             new ShuffleVectorInst(I2, UndefValue::get(I2T),
                                   ConstantVector::get(Mask),
-                                  getReplacementName(I, true, o, 1));
-          NewI2->insertBefore(J);
+                                  getReplacementName(IBeforeJ ? I : J,
+                                                     true, o, 1));
+          NewI2->insertBefore(IBeforeJ ? J : I);
           I2 = NewI2;
           I2T = I1T;
           I2Elem = I1Elem;
@@ -1863,8 +2363,8 @@
 
         Instruction *NewOp =
           new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
-                                getReplacementName(I, true, o));
-        NewOp->insertBefore(J);
+                                getReplacementName(IBeforeJ ? I : J, true, o));
+        NewOp->insertBefore(IBeforeJ ? J : I);
         return NewOp;
       }
     }
@@ -1872,17 +2372,17 @@
     Type *ArgType = ArgTypeL;
     if (numElemL < numElemH) {
       if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
-                                         ArgTypeL, VArgType, 1)) {
+                                         ArgTypeL, VArgType, IBeforeJ, 1)) {
         // This is another short-circuit case: we're combining a scalar into
         // a vector that is formed by an IE chain. We've just expanded the IE
         // chain, now insert the scalar and we're done.
 
         Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
-                                               getReplacementName(I, true, o));
-        S->insertBefore(J);
+                           getReplacementName(IBeforeJ ? I : J, true, o));
+        S->insertBefore(IBeforeJ ? J : I);
         return S;
       } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
-                                ArgTypeH)) {
+                                ArgTypeH, IBeforeJ)) {
         // The two vector inputs to the shuffle must be the same length,
         // so extend the smaller vector to be the same length as the larger one.
         Instruction *NLOp;
@@ -1897,29 +2397,32 @@
     
           NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
                                        ConstantVector::get(Mask),
-                                       getReplacementName(I, true, o, 1));
+                                       getReplacementName(IBeforeJ ? I : J,
+                                                          true, o, 1));
         } else {
           NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
-                                           getReplacementName(I, true, o, 1));
+                                           getReplacementName(IBeforeJ ? I : J,
+                                                              true, o, 1));
         }
   
-        NLOp->insertBefore(J);
+        NLOp->insertBefore(IBeforeJ ? J : I);
         LOp = NLOp;
       }
 
       ArgType = ArgTypeH;
     } else if (numElemL > numElemH) {
       if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
-                                         ArgTypeH, VArgType)) {
+                                         ArgTypeH, VArgType, IBeforeJ)) {
         Instruction *S =
           InsertElementInst::Create(LOp, HOp, 
                                     ConstantInt::get(Type::getInt32Ty(Context),
                                                      numElemL),
-                                    getReplacementName(I, true, o));
-        S->insertBefore(J);
+                                    getReplacementName(IBeforeJ ? I : J,
+                                                       true, o));
+        S->insertBefore(IBeforeJ ? J : I);
         return S;
       } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
-                                ArgTypeL)) {
+                                ArgTypeL, IBeforeJ)) {
         Instruction *NHOp;
         if (numElemH > 1) {
           std::vector<Constant *> Mask(numElemL);
@@ -1931,13 +2434,15 @@
     
           NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
                                        ConstantVector::get(Mask),
-                                       getReplacementName(I, true, o, 1));
+                                       getReplacementName(IBeforeJ ? I : J,
+                                                          true, o, 1));
         } else {
           NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
-                                           getReplacementName(I, true, o, 1));
+                                           getReplacementName(IBeforeJ ? I : J,
+                                                              true, o, 1));
         }
   
-        NHOp->insertBefore(J);
+        NHOp->insertBefore(IBeforeJ ? J : I);
         HOp = NHOp;
       }
     }
@@ -1955,19 +2460,21 @@
       }
 
       Instruction *BV = new ShuffleVectorInst(LOp, HOp,
-                                              ConstantVector::get(Mask),
-                                              getReplacementName(I, true, o));
-      BV->insertBefore(J);
+                          ConstantVector::get(Mask),
+                          getReplacementName(IBeforeJ ? I : J, true, o));
+      BV->insertBefore(IBeforeJ ? J : I);
       return BV;
     }
 
     Instruction *BV1 = InsertElementInst::Create(
                                           UndefValue::get(VArgType), LOp, CV0,
-                                          getReplacementName(I, true, o, 1));
-    BV1->insertBefore(I);
+                                          getReplacementName(IBeforeJ ? I : J,
+                                                             true, o, 1));
+    BV1->insertBefore(IBeforeJ ? J : I);
     Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
-                                          getReplacementName(I, true, o, 2));
-    BV2->insertBefore(J);
+                                          getReplacementName(IBeforeJ ? I : J,
+                                                             true, o, 2));
+    BV2->insertBefore(IBeforeJ ? J : I);
     return BV2;
   }
 
@@ -1976,7 +2483,7 @@
   void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
                      Instruction *I, Instruction *J,
                      SmallVector<Value *, 3> &ReplacedOperands,
-                     bool FlipMemInputs) {
+                     bool IBeforeJ) {
     unsigned NumOperands = I->getNumOperands();
 
     for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
@@ -1985,8 +2492,7 @@
 
       if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
         // This is the pointer for a load/store instruction.
-        ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o,
-                                FlipMemInputs);
+        ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
         continue;
       } else if (isa<CallInst>(I)) {
         Function *F = cast<CallInst>(I)->getCalledFunction();
@@ -2014,8 +2520,7 @@
         continue;
       }
 
-      ReplacedOperands[o] =
-        getReplacementInput(Context, I, J, o, FlipMemInputs);
+      ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
     }
   }
 
@@ -2026,8 +2531,7 @@
   void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
                      Instruction *J, Instruction *K,
                      Instruction *&InsertionPt,
-                     Instruction *&K1, Instruction *&K2,
-                     bool FlipMemInputs) {
+                     Instruction *&K1, Instruction *&K2) {
     if (isa<StoreInst>(I)) {
       AA->replaceWithNewValue(I, K);
       AA->replaceWithNewValue(J, K);
@@ -2057,13 +2561,11 @@
         }
 
         K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
-                                   ConstantVector::get(
-                                     FlipMemInputs ? Mask2 : Mask1),
+                                   ConstantVector::get( Mask1),
                                    getReplacementName(K, false, 1));
       } else {
         Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
-        Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
-        K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0,
+        K1 = ExtractElementInst::Create(K, CV0,
                                           getReplacementName(K, false, 1));
       }
 
@@ -2075,13 +2577,11 @@
         }
 
         K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
-                                   ConstantVector::get(
-                                     FlipMemInputs ? Mask1 : Mask2),
+                                   ConstantVector::get( Mask2),
                                    getReplacementName(K, false, 2));
       } else {
-        Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
         Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
-        K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1,
+        K2 = ExtractElementInst::Create(K, CV1,
                                           getReplacementName(K, false, 2));
       }
 
@@ -2181,36 +2681,6 @@
     }
   }
 
-  // As with the aliasing information, SCEV can also change because of
-  // vectorization. This information is used to compute relative pointer
-  // offsets; the necessary information will be cached here prior to
-  // fusion.
-  void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts,
-                                   DenseMap<Value *, Value *> &ChosenPairs,
-                                   DenseSet<Value *> &LowPtrInsts) {
-    for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
-      PIE = PairableInsts.end(); PI != PIE; ++PI) {
-      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
-      if (P == ChosenPairs.end()) continue;
-
-      Instruction *I = cast<Instruction>(P->first);
-      Instruction *J = cast<Instruction>(P->second);
-
-      if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
-        continue;
-
-      Value *IPtr, *JPtr;
-      unsigned IAlignment, JAlignment;
-      int64_t OffsetInElmts;
-      if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
-                          OffsetInElmts) || abs64(OffsetInElmts) != 1)
-        llvm_unreachable("Pre-fusion pointer analysis failed");
-
-      Value *LowPI = (OffsetInElmts > 0) ? I : J;
-      LowPtrInsts.insert(LowPI);
-    }
-  }
-
   // When the first instruction in each pair is cloned, it will inherit its
   // parent's metadata. This metadata must be combined with that of the other
   // instruction in a safe way.
@@ -2244,27 +2714,27 @@
   // second member).
   void BBVectorize::fuseChosenPairs(BasicBlock &BB,
                      std::vector<Value *> &PairableInsts,
-                     DenseMap<Value *, Value *> &ChosenPairs) {
+                     DenseMap<Value *, Value *> &ChosenPairs,
+                     DenseSet<ValuePair> &FixedOrderPairs,
+                     DenseMap<VPPair, unsigned> &PairConnectionTypes,
+                     std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                     std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) {
     LLVMContext& Context = BB.getContext();
 
     // During the vectorization process, the order of the pairs to be fused
     // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
     // list. After a pair is fused, the flipped pair is removed from the list.
-    std::vector<ValuePair> FlippedPairs;
-    FlippedPairs.reserve(ChosenPairs.size());
+    DenseSet<ValuePair> FlippedPairs;
     for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
          E = ChosenPairs.end(); P != E; ++P)
-      FlippedPairs.push_back(ValuePair(P->second, P->first));
-    for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(),
+      FlippedPairs.insert(ValuePair(P->second, P->first));
+    for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(),
          E = FlippedPairs.end(); P != E; ++P)
       ChosenPairs.insert(*P);
 
     std::multimap<Value *, Value *> LoadMoveSet;
     collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
 
-    DenseSet<Value *> LowPtrInsts;
-    collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts);
-
     DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
 
     for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
@@ -2304,44 +2774,91 @@
         continue;
       }
 
-      bool FlipMemInputs = false;
-      if (isa<LoadInst>(I) || isa<StoreInst>(I))
-        FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end());
+      // If the pair must have the other order, then flip it.
+      bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
+      if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) {
+        // This pair does not have a fixed order, and so we might want to
+        // flip it if that will yield fewer shuffles. We count the number
+        // of dependencies connected via swaps, and those directly connected,
+        // and flip the order if the number of swaps is greater.
+        bool OrigOrder = true;
+        VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J));
+        if (IP.first == ConnectedPairDeps.end()) {
+          IP = ConnectedPairDeps.equal_range(ValuePair(J, I));
+          OrigOrder = false;
+        }
+
+        if (IP.first != ConnectedPairDeps.end()) {
+          unsigned NumDepsDirect = 0, NumDepsSwap = 0;
+          for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+               Q != IP.second; ++Q) {
+            DenseMap<VPPair, unsigned>::iterator R =
+              PairConnectionTypes.find(VPPair(Q->second, Q->first));
+            assert(R != PairConnectionTypes.end() &&
+                   "Cannot find pair connection type");
+            if (R->second == PairConnectionDirect)
+              ++NumDepsDirect;
+            else if (R->second == PairConnectionSwap)
+              ++NumDepsSwap;
+          }
+
+          if (!OrigOrder)
+            std::swap(NumDepsDirect, NumDepsSwap);
 
+          if (NumDepsSwap > NumDepsDirect) {
+            FlipPairOrder = true;
+            DEBUG(dbgs() << "BBV: reordering pair: " << *I <<
+                            " <-> " << *J << "\n");
+          }
+        }
+      }
+
+      Instruction *L = I, *H = J;
+      if (FlipPairOrder)
+        std::swap(H, L);
+
+      // If the pair being fused uses the opposite order from that in the pair
+      // connection map, then we need to flip the types.
+      VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L));
+      for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
+           Q != IP.second; ++Q) {
+        DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q);
+        assert(R != PairConnectionTypes.end() &&
+               "Cannot find pair connection type");
+        if (R->second == PairConnectionDirect)
+          R->second = PairConnectionSwap;
+        else if (R->second == PairConnectionSwap)
+          R->second = PairConnectionDirect;
+      }
+
+      bool LBeforeH = !FlipPairOrder;
       unsigned NumOperands = I->getNumOperands();
       SmallVector<Value *, 3> ReplacedOperands(NumOperands);
-      getReplacementInputsForPair(Context, I, J, ReplacedOperands,
-        FlipMemInputs);
+      getReplacementInputsForPair(Context, L, H, ReplacedOperands,
+                                  LBeforeH);
 
       // Make a copy of the original operation, change its type to the vector
       // type and replace its operands with the vector operands.
-      Instruction *K = I->clone();
-      if (I->hasName()) K->takeName(I);
+      Instruction *K = L->clone();
+      if (L->hasName())
+        K->takeName(L);
+      else if (H->hasName())
+        K->takeName(H);
 
       if (!isa<StoreInst>(K))
-        K->mutateType(getVecTypeForPair(I->getType(), J->getType()));
+        K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
 
-      combineMetadata(K, J);
+      combineMetadata(K, H);
 
       for (unsigned o = 0; o < NumOperands; ++o)
         K->setOperand(o, ReplacedOperands[o]);
 
-      // If we've flipped the memory inputs, make sure that we take the correct
-      // alignment.
-      if (FlipMemInputs) {
-        if (isa<StoreInst>(K))
-          cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment());
-        else
-          cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment());
-      }
-
       K->insertAfter(J);
 
       // Instruction insertion point:
       Instruction *InsertionPt = K;
       Instruction *K1 = 0, *K2 = 0;
-      replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2,
-        FlipMemInputs);
+      replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
 
       // The use tree of the first original instruction must be moved to after
       // the location of the second instruction. The entire use tree of the
@@ -2351,10 +2868,10 @@
       moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J);
 
       if (!isa<StoreInst>(I)) {
-        I->replaceAllUsesWith(K1);
-        J->replaceAllUsesWith(K2);
-        AA->replaceWithNewValue(I, K1);
-        AA->replaceWithNewValue(J, K2);
+        L->replaceAllUsesWith(K1);
+        H->replaceAllUsesWith(K2);
+        AA->replaceWithNewValue(L, K1);
+        AA->replaceWithNewValue(H, K2);
       }
 
       // Instructions that may read from memory may be in the load move set.
@@ -2387,6 +2904,9 @@
       SE->forgetValue(J);
       I->eraseFromParent();
       J->eraseFromParent();
+
+      DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" <<
+                                               BB << "\n");
     }
 
     DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
@@ -2397,6 +2917,7 @@
 static const char bb_vectorize_name[] = "Basic-Block Vectorization";
 INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
 

Modified: llvm/branches/R600/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Nov 13 09:21:47 2012
@@ -18,10 +18,13 @@
 //
 // This pass has three parts:
 // 1. The main loop pass that drives the different parts.
-// 2. LoopVectorizationLegality - A helper class that checks for the legality
+// 2. LoopVectorizationLegality - A unit that checks for the legality
 //    of the vectorization.
-// 3. SingleBlockLoopVectorizer - A helper class that performs the actual
+// 3. SingleBlockLoopVectorizer - A unit that performs the actual
 //    widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+//    of vectorization. It decides on the optimal vector width, which
+//    can be one, if vectorization is not profitable.
 //===----------------------------------------------------------------------===//
 //
 // The reduction-variable vectorization is based on the paper:
@@ -51,13 +54,15 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/TargetTransformInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -67,13 +72,17 @@
 using namespace llvm;
 
 static cl::opt<unsigned>
-DefaultVectorizationFactor("default-loop-vectorize-width",
-                          cl::init(4), cl::Hidden,
-                          cl::desc("Set the default loop vectorization width"));
+VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
+          cl::desc("Set the default vectorization width. Zero is autoselect."));
+
+/// We don't vectorize loops with a known constant trip count below this number.
+const unsigned TinyTripCountThreshold = 16;
+
 namespace {
 
-// Forward declaration.
+// Forward declarations.
 class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
 
 /// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
@@ -93,8 +102,9 @@
 public:
   /// Ctor.
   SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
-                            LPPassManager *Lpm, unsigned VecWidth):
-  OrigLoop(Orig), SE(Se), LI(Li), LPM(Lpm), VF(VecWidth),
+                            DominatorTree *dt, LPPassManager *Lpm,
+                            unsigned VecWidth):
+  OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth),
   Builder(Se->getContext()), Induction(0), OldInduction(0) { }
 
   // Perform the actual loop widening (vectorization).
@@ -103,9 +113,9 @@
     createEmptyLoop(Legal);
     /// Widen each instruction in the old loop to a new one in the new loop.
     /// Use the Legality module to find the induction and reduction variables.
-   vectorizeLoop(Legal);
+    vectorizeLoop(Legal);
     // register the new loop.
-    cleanup();
+    updateAnalysis();
  }
 
 private:
@@ -114,7 +124,7 @@
   /// Copy and widen the instructions from the old loop.
   void vectorizeLoop(LoopVectorizationLegality *Legal);
   /// Insert the new loop to the loop hierarchy and pass manager.
-  void cleanup();
+  void updateAnalysis();
 
   /// This instruction is un-vectorizable. Implement it as a sequence
   /// of scalars.
@@ -150,6 +160,8 @@
   ScalarEvolution *SE;
   // Loop Info.
   LoopInfo *LI;
+  // Dominator Tree.
+  DominatorTree *DT;
   // Loop Pass Manager;
   LPPassManager *LPM;
   // The vectorization factor to use.
@@ -160,6 +172,10 @@
 
   // --- Vectorization state ---
 
+  /// The vector-loop preheader.
+  BasicBlock *LoopVectorPreHeader;
+  /// The scalar-loop preheader.
+  BasicBlock *LoopScalarPreHeader;
   /// Middle Block between the vector and the scalar.
   BasicBlock *LoopMiddleBlock;
   ///The ExitBlock of the scalar loop.
@@ -198,12 +214,13 @@
   TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { }
 
   /// This represents the kinds of reductions that we support.
-  /// We use the enum values to hold the 'identity' value for
-  /// each operand. This value does not change the result if applied.
   enum ReductionKind {
-    NoReduction = -1, /// Not a reduction.
-    IntegerAdd  = 0,  /// Sum of numbers.
-    IntegerMult = 1  /// Product of numbers.
+    NoReduction, /// Not a reduction.
+    IntegerAdd,  /// Sum of numbers.
+    IntegerMult, /// Product of numbers.
+    IntegerOr,   /// Bitwise or logical OR of numbers.
+    IntegerAnd,  /// Bitwise or logical AND of numbers.
+    IntegerXor   /// Bitwise or logical XOR of numbers.
   };
 
   /// This POD struct holds information about reduction variables.
@@ -229,11 +246,10 @@
   /// of the reductions that were found in the loop.
   typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
 
-  /// Returns the maximum vectorization factor that we *can* use to vectorize
-  /// this loop. This does not mean that it is profitable to vectorize this
-  /// loop, only that it is legal to do so. This may be a large number. We
-  /// can vectorize to any SIMD width below this number.
-  unsigned getLoopMaxVF();
+  /// Returns true if it is legal to vectorize this loop.
+  /// This does not mean that it is profitable to vectorize this
+  /// loop, only that it is legal to do so.
+  bool canVectorize();
 
   /// Returns the Induction variable.
   PHINode *getInduction() {return Induction;}
@@ -247,6 +263,9 @@
   /// This check allows us to vectorize A[idx] into a wide load/store.
   bool isConsecutiveGep(Value *Ptr);
 
+  /// Returns true if this instruction will remain scalar after vectorization.
+  bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -259,10 +278,6 @@
   /// Returns true if BB is vectorizable
   bool canVectorizeMemory(BasicBlock &BB);
 
-  // Check if a pointer value is known to be disjoint.
-  // Example: Alloca, Global, NoAlias.
-  bool isIdentifiedSafeObject(Value* Val);
-
   /// Returns True, if 'Phi' is the kind of reduction variable for type
   /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
   bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
@@ -288,6 +303,56 @@
   /// Allowed outside users. This holds the reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value*, 4> AllowedExit;
+  /// This set holds the variables which are known to be uniform after
+  /// vectorization.
+  SmallPtrSet<Instruction*, 4> Uniforms;
+};
+
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because
+/// of a number of reasons. In this class we mainly attempt to predict
+/// the expected speedup/slowdowns due to the supported instruction set.
+/// We use the VectorTargetTransformInfo to query the different backends
+/// for the cost of different operations.
+class LoopVectorizationCostModel {
+public:
+  /// C'tor.
+  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
+                             LoopVectorizationLegality *Leg,
+                             const VectorTargetTransformInfo *Vtti):
+  TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
+
+  /// Returns the most profitable vectorization factor for the loop that is
+  /// smaller or equal to the VF argument. This method checks every power
+  /// of two up to VF.
+  unsigned findBestVectorizationFactor(unsigned VF = 8);
+
+private:
+  /// Returns the expected execution cost. The unit of the cost does
+  /// not matter because we use the 'cost' units to compare different
+  /// vector widths. The cost that is returned is *not* normalized by
+  /// the factor width.
+  unsigned expectedCost(unsigned VF);
+
+  /// Returns the execution time cost of an instruction for a given vector
+  /// width. Vector width of one means scalar.
+  unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+  /// A helper function for converting Scalar types to vector types.
+  /// If the incoming type is void, we return void. If the VF is 1, we return
+  /// the scalar type.
+  static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
+  /// The loop that we evaluate.
+  Loop *TheLoop;
+  /// Scev analysis.
+  ScalarEvolution *SE;
+
+  /// Vectorization legality.
+  LoopVectorizationLegality *Legal;
+  /// Vector target information.
+  const VectorTargetTransformInfo *VTTI;
 };
 
 struct LoopVectorize : public LoopPass {
@@ -300,6 +365,8 @@
   ScalarEvolution *SE;
   DataLayout *DL;
   LoopInfo *LI;
+  TargetTransformInfo *TTI;
+  DominatorTree *DT;
 
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
     // We only vectorize innermost loops.
@@ -309,25 +376,45 @@
     SE = &getAnalysis<ScalarEvolution>();
     DL = getAnalysisIfAvailable<DataLayout>();
     LI = &getAnalysis<LoopInfo>();
+    TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+    DT = &getAnalysis<DominatorTree>();
 
     DEBUG(dbgs() << "LV: Checking a loop in \"" <<
           L->getHeader()->getParent()->getName() << "\"\n");
 
     // Check if it is legal to vectorize the loop.
     LoopVectorizationLegality LVL(L, SE, DL);
-    unsigned MaxVF = LVL.getLoopMaxVF();
-
-    // Check that we can vectorize this loop using the chosen vectorization
-    // width.
-    if (MaxVF < DefaultVectorizationFactor) {
-      DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n");
+    if (!LVL.canVectorize()) {
+      DEBUG(dbgs() << "LV: Not vectorizing.\n");
       return false;
     }
 
-    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n");
+    // Select the preffered vectorization factor.
+    unsigned VF = 1;
+    if (VectorizationFactor == 0) {
+      const VectorTargetTransformInfo *VTTI = 0;
+      if (TTI)
+        VTTI = TTI->getVectorTargetTransformInfo();
+      // Use the cost model.
+      LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+      VF = CM.findBestVectorizationFactor();
+
+      if (VF == 1) {
+        DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+        return false;
+      }
+
+    } else {
+      // Use the user command flag.
+      VF = VectorizationFactor;
+    }
+
+    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
+          L->getHeader()->getParent()->getParent()->getModuleIdentifier()<<
+          "\n");
 
     // If we decided that it is *legal* to vectorizer the loop then do it.
-    SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor);
+    SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF);
     LB.vectorize(&LVL);
 
     DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -340,6 +427,9 @@
     AU.addRequiredID(LCSSAID);
     AU.addRequired<LoopInfo>();
     AU.addRequired<ScalarEvolution>();
+    AU.addRequired<DominatorTree>();
+    AU.addPreserved<LoopInfo>();
+    AU.addPreserved<DominatorTree>();
   }
 
 };
@@ -389,7 +479,7 @@
 }
 
 bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
-  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+  GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
   if (!Gep)
     return false;
 
@@ -434,7 +524,7 @@
   SmallVector<Constant*, 8> Indices;
   // Create a vector of consecutive numbers from zero to VF.
   for (unsigned i = 0; i < VF; ++i)
-    Indices.push_back(ConstantInt::get(ScalarTy, Val));
+    Indices.push_back(ConstantInt::get(ScalarTy, Val, true));
 
   // Add the consecutive indices to the vector value.
   return ConstantVector::get(Indices);
@@ -510,7 +600,8 @@
     WidenMap[Instr] = VecResults;
 }
 
-void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+void
+SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   /*
    In this function we generate a new loop. The new loop will contain
    the vectorized instructions while the old loop will continue to run the
@@ -545,6 +636,10 @@
   BasicBlock *ExitBlock = OrigLoop->getExitBlock();
   assert(ExitBlock && "Must have an exit block");
 
+  // The loop index does not have to start at Zero. It starts with this value.
+  OldInduction = Legal->getInduction();
+  Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
+
   assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
   assert(BypassBlock && "Invalid loop structure");
 
@@ -560,7 +655,6 @@
                                  "scalar.preheader");
   // Find the induction variable.
   BasicBlock *OldBasicBlock = OrigLoop->getHeader();
-  OldInduction = Legal->getInduction();
   assert(OldInduction && "We must have a single phi node.");
   Type *IdxTy = OldInduction->getType();
 
@@ -570,7 +664,6 @@
 
   // Generate the induction variable.
   Induction = Builder.CreatePHI(IdxTy, 2, "index");
-  Constant *Zero = ConstantInt::get(IdxTy, 0);
   Constant *Step = ConstantInt::get(IdxTy, VF);
 
   // Find the loop boundaries.
@@ -594,15 +687,22 @@
 
   // Count holds the overall loop count (N).
   Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+
+  // Add the start index to the loop count to get the new end index.
+  Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
+
   // Now we need to generate the expression for N - (N % VF), which is
   // the part that the vectorized body will execute.
   Constant *CIVF = ConstantInt::get(IdxTy, VF);
   Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc);
   Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc);
+  Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
+                                                     "end.idx.rnd.down", Loc);
 
   // Now, compare the new count to zero. If it is zero, jump to the scalar part.
   Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
-                               CountRoundDown, ConstantInt::getNullValue(IdxTy),
+                               IdxEndRoundDown,
+                               StartIdx,
                                "cmp.zero", Loc);
   BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
   // Remove the old terminator.
@@ -611,8 +711,8 @@
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
   // If (N - N%VF) == N, then we *don't* need to run the remainder.
-  Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
-                                CountRoundDown, "cmp.n",
+  Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
+                                IdxEndRoundDown, "cmp.n",
                                 MiddleBlock->getTerminator());
 
   BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
@@ -621,10 +721,10 @@
 
   // Create i+1 and fill the PHINode.
   Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
-  Induction->addIncoming(Zero, VectorPH);
+  Induction->addIncoming(StartIdx, VectorPH);
   Induction->addIncoming(NextIdx, VecBody);
   // Create the compare.
-  Value *ICmp = Builder.CreateICmpEQ(NextIdx, CountRoundDown);
+  Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
   Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
 
   // Now we have two terminators. Remove the old one from the block.
@@ -632,7 +732,7 @@
 
   // Fix the scalar body iteration count.
   unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
-  OldInduction->setIncomingValue(BlockIdx, CountRoundDown);
+  OldInduction->setIncomingValue(BlockIdx, IdxEndRoundDown);
 
   // Get ready to start creating new instructions into the vectorized body.
   Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
@@ -651,6 +751,8 @@
   }
 
   // Save the state.
+  LoopVectorPreHeader = VectorPH;
+  LoopScalarPreHeader = ScalarPH;
   LoopMiddleBlock = MiddleBlock;
   LoopExitBlock = ExitBlock;
   LoopVectorBody = VecBody;
@@ -658,8 +760,36 @@
   LoopBypassBlock = BypassBlock;
 }
 
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+static unsigned
+getReductionIdentity(LoopVectorizationLegality::ReductionKind K) {
+  switch (K) {
+  case LoopVectorizationLegality::IntegerXor:
+  case LoopVectorizationLegality::IntegerAdd:
+  case LoopVectorizationLegality::IntegerOr:
+    // Adding, Xoring, Oring zero to a number does not change it.
+    return 0;
+  case LoopVectorizationLegality::IntegerMult:
+    // Multiplying a number by 1 does not change it.
+    return 1;
+  case LoopVectorizationLegality::IntegerAnd:
+    // AND-ing a number with an all-1 value does not change it.
+    return -1;
+  default:
+    llvm_unreachable("Unknown reduction kind");
+  }
+}
+
 void
 SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+  //===------------------------------------------------===//
+  //
+  // Notice: any optimization or new instruction that go
+  // into the code below should be also be implemented in
+  // the cost-model.
+  //
+  //===------------------------------------------------===//
   typedef SmallVector<PHINode*, 4> PhiVector;
   BasicBlock &BB = *OrigLoop->getHeader();
   Constant *Zero = ConstantInt::get(
@@ -719,8 +849,19 @@
         BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
         Value *A = getVectorValue(Inst->getOperand(0));
         Value *B = getVectorValue(Inst->getOperand(1));
+
         // Use this vector value for all users of the original instruction.
-        WidenMap[Inst] = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+        Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+        WidenMap[Inst] = V;
+
+        // Update the NSW, NUW and Exact flags.
+        BinaryOperator *VecOp = cast<BinaryOperator>(V);
+        if (isa<OverflowingBinaryOperator>(BinOp)) {
+          VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
+          VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
+        }
+        if (isa<PossiblyExactOperator>(VecOp))
+          VecOp->setIsExact(BinOp->isExact());
         break;
       }
       case Instruction::Select: {
@@ -774,8 +915,8 @@
         // The last index does not have to be the induction. It can be
         // consecutive and be a function of the index. For example A[I+1];
         unsigned NumOperands = Gep->getNumOperands();
-        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
-        LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0));
+        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
+        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
 
         // Create the new GEP with the new induction variable.
         GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -804,7 +945,7 @@
         // consecutive and be a function of the index. For example A[I+1];
         unsigned NumOperands = Gep->getNumOperands();
         Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
-        LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0));
+        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
 
         // Create the new GEP with the new induction variable.
         GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -875,10 +1016,9 @@
     Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
     Type *VecTy = VectorExit->getType();
 
-    // Find the reduction identity variable. The value of the enum is the
-    // identity. Zero for addition. One for Multiplication.
-    unsigned IdentitySclr =  RdxDesc.Kind;
-    Constant *Identity = getUniformVector(IdentitySclr,
+    // Find the reduction identity variable. Zero for addition, or, xor,
+    // one for multiplication, -1 for And.
+    Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind),
                                           VecTy->getScalarType());
 
     // This vector is the Identity vector where the first element is the
@@ -914,14 +1054,28 @@
     // Extract the first scalar.
     Value *Scalar0 =
       Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
-    // Extract and sum the remaining vector elements.
+    // Extract and reduce the remaining vector elements.
     for (unsigned i=1; i < VF; ++i) {
       Value *Scalar1 =
         Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
-      if (RdxDesc.Kind == LoopVectorizationLegality::IntegerAdd) {
-        Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
-      } else {
-        Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+      switch (RdxDesc.Kind) {
+        case LoopVectorizationLegality::IntegerAdd:
+          Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerMult:
+          Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerOr:
+          Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerAnd:
+          Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
+          break;
+        case LoopVectorizationLegality::IntegerXor:
+          Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
+          break;
+        default:
+          llvm_unreachable("Unknown reduction operation");
       }
     }
 
@@ -956,23 +1110,36 @@
   }// end of for each redux variable.
 }
 
-void SingleBlockLoopVectorizer::cleanup() {
+void SingleBlockLoopVectorizer::updateAnalysis() {
   // The original basic block.
   SE->forgetLoop(OrigLoop);
+
+  // Update the dominator tree information.
+  assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) &&
+         "Entry does not dominate exit.");
+
+  DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock);
+  DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
+  DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock);
+  DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
+  DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
+  DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+
+  DEBUG(DT->verifyAnalysis());
 }
 
-unsigned LoopVectorizationLegality::getLoopMaxVF() {
+bool LoopVectorizationLegality::canVectorize() {
   if (!TheLoop->getLoopPreheader()) {
     assert(false && "No preheader!!");
     DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
-    return  1;
+    return  false;
   }
 
   // We can only vectorize single basic block loops.
   unsigned NumBlocks = TheLoop->getNumBlocks();
   if (NumBlocks != 1) {
     DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
-    return 1;
+    return false;
   }
 
   // We need to have a loop header.
@@ -982,22 +1149,30 @@
   // Go over each instruction and look at memory deps.
   if (!canVectorizeBlock(*BB)) {
     DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
-    return 1;
+    return false;
   }
 
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
   if (ExitCount == SE->getCouldNotCompute()) {
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
-    return 1;
+    return false;
+  }
+
+  // Do not loop-vectorize loops with a tiny trip count.
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB);
+  if (TC > 0u && TC < TinyTripCountThreshold) {
+    DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
+          "This loop is not worth vectorizing.\n");
+    return false;
   }
 
   DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
 
   // Okay! We can vectorize. At this point we don't have any other mem analysis
-  // which may limit our maximum vectorization factor, so just return the
-  // maximum SIMD size.
-  return DefaultVectorizationFactor;
+  // which may limit our maximum vectorization factor, so just return true with
+  // no restrictions.
+  return true;
 }
 
 bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
@@ -1032,7 +1207,19 @@
         continue;
       }
       if (AddReductionVar(Phi, IntegerMult)) {
-        DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n");
+        DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+        continue;
+      }
+      if (AddReductionVar(Phi, IntegerOr)) {
+        DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+        continue;
+      }
+      if (AddReductionVar(Phi, IntegerAnd)) {
+        DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+        continue;
+      }
+      if (AddReductionVar(Phi, IntegerXor)) {
+        DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
         continue;
       }
 
@@ -1043,8 +1230,7 @@
     // We still don't handle functions.
     CallInst *CI = dyn_cast<CallInst>(I);
     if (CI) {
-      DEBUG(dbgs() << "LV: Found a call site:"<<
-            CI->getCalledFunction()->getName() << "\n");
+      DEBUG(dbgs() << "LV: Found a call site.\n");
       return false;
     }
 
@@ -1076,9 +1262,40 @@
       return false;
   }
 
-  // If the memory dependencies do not prevent us from
-  // vectorizing, then vectorize.
-  return canVectorizeMemory(BB);
+  // Don't vectorize if the memory dependencies do not allow vectorization.
+  if (!canVectorizeMemory(BB))
+    return false;
+
+  // We now know that the loop is vectorizable!
+  // Collect variables that will remain uniform after vectorization.
+  std::vector<Value*> Worklist;
+
+  // Start with the conditional branch and walk up the block.
+  Worklist.push_back(BB.getTerminator()->getOperand(0));
+
+  while (Worklist.size()) {
+    Instruction *I = dyn_cast<Instruction>(Worklist.back());
+    Worklist.pop_back();
+    // Look at instructions inside this block.
+    if (!I) continue;
+    if (I->getParent() != &BB) continue;
+
+    // Stop when reaching PHI nodes.
+    if (isa<PHINode>(I)) {
+      assert(I == Induction && "Found a uniform PHI that is not the induction");
+      break;
+    }
+
+    // This is a known uniform.
+    Uniforms.insert(I);
+
+    // Insert all operands.
+    for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+      Worklist.push_back(I->getOperand(i));
+    }
+  }
+
+  return true;
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
@@ -1166,6 +1383,13 @@
       Reads.push_back(Ptr);
   }
 
+  // If we write (or read-write) to a single destination and there are no
+  // other reads in this loop then is it safe to vectorize.
+  if (ReadWrites.size() == 1 && Reads.size() == 0) {
+    DEBUG(dbgs() << "LV: Found a write-only loop!\n");
+    return true;
+  }
+
   // Now that the pointers are in two lists (Reads and ReadWrites), we
   // can check that there are no conflicts between each of the writes and
   // between the writes to the reads.
@@ -1178,7 +1402,7 @@
     GetUnderlyingObjects(*I, TempObjects, DL);
     for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
          it != e; ++it) {
-      if (!isIdentifiedSafeObject(*it)) {
+      if (!isIdentifiedObject(*it)) {
         DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
         return false;
       }
@@ -1196,7 +1420,7 @@
     GetUnderlyingObjects(*I, TempObjects, DL);
     for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
          it != e; ++it) {
-      if (!isIdentifiedSafeObject(*it)) {
+      if (!isIdentifiedObject(*it)) {
         DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
         return false;
       }
@@ -1213,19 +1437,6 @@
   return true;
 }
 
-/// Checks if the value is a Global variable or if it is an Arguments
-/// marked with the NoAlias attribute.
-bool LoopVectorizationLegality::isIdentifiedSafeObject(Value* Val) {
-  assert(Val && "Invalid value");
-  if (isa<GlobalValue>(Val))
-    return true;
-  if (isa<AllocaInst>(Val))
-    return true;
-  if (Argument *A = dyn_cast<Argument>(Val))
-    return A->hasNoAliasAttr();
-  return false;
-}
-
 bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
                                                 ReductionKind Kind) {
   if (Phi->getNumIncomingValues() != 2)
@@ -1319,6 +1530,12 @@
     case Instruction::UDiv:
     case Instruction::SDiv:
       return Kind == IntegerMult;
+    case Instruction::And:
+      return Kind == IntegerAnd;
+    case Instruction::Or:
+      return Kind == IntegerOr;
+    case Instruction::Xor:
+      return Kind == IntegerXor;
     }
 }
 
@@ -1331,15 +1548,222 @@
     return false;
   }
   const SCEV *Step = AR->getStepRecurrence(*SE);
-  const SCEV *Start = AR->getStart();
 
-  if (!Step->isOne() || !Start->isZero()) {
-    DEBUG(dbgs() << "LV: PHI does not start at zero or steps by one.\n");
+  if (!Step->isOne()) {
+    DEBUG(dbgs() << "LV: PHI stride does not equal one.\n");
     return false;
   }
   return true;
 }
 
+unsigned
+LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+  if (!VTTI) {
+    DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
+    return 1;
+  }
+
+  float Cost = expectedCost(1);
+  unsigned Width = 1;
+  DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+  for (unsigned i=2; i <= VF; i*=2) {
+    // Notice that the vector loop needs to be executed less times, so
+    // we need to divide the cost of the vector loops by the width of
+    // the vector elements.
+    float VectorCost = expectedCost(i) / (float)i;
+    DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+          (int)VectorCost << ".\n");
+    if (VectorCost < Cost) {
+      Cost = VectorCost;
+      Width = i;
+    }
+  }
+
+  DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+  return Width;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
+  // We can only estimate the cost of single basic block loops.
+  assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
+
+  BasicBlock *BB = TheLoop->getHeader();
+  unsigned Cost = 0;
+
+  // For each instruction in the old loop.
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    Instruction *Inst = it;
+    unsigned C = getInstructionCost(Inst, VF);
+    Cost += C;
+    DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
+          " For instruction: "<< *Inst << "\n");
+  }
+
+  return Cost;
+}
+
+unsigned
+LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
+  assert(VTTI && "Invalid vector target transformation info");
+
+  // If we know that this instruction will remain uniform, check the cost of
+  // the scalar version.
+  if (Legal->isUniformAfterVectorization(I))
+    VF = 1;
+
+  Type *RetTy = I->getType();
+  Type *VectorTy = ToVectorTy(RetTy, VF);
+
+
+  // TODO: We need to estimate the cost of intrinsic calls.
+  switch (I->getOpcode()) {
+    case Instruction::GetElementPtr:
+      // We mark this instruction as zero-cost because scalar GEPs are usually
+      // lowered to the intruction addressing mode. At the moment we don't
+      // generate vector geps.
+      return 0;
+    case Instruction::Br: {
+      return VTTI->getCFInstrCost(I->getOpcode());
+    }
+    case Instruction::PHI:
+      return 0;
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
+    }
+    case Instruction::Select: {
+      SelectInst *SI = cast<SelectInst>(I);
+      const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+      bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+      Type *CondTy = SI->getCondition()->getType();
+      if (ScalarCond)
+        CondTy = VectorType::get(CondTy, VF);
+
+      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+    }
+    case Instruction::ICmp:
+    case Instruction::FCmp: {
+      Type *ValTy = I->getOperand(0)->getType();
+      VectorTy = ToVectorTy(ValTy, VF);
+      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
+    }
+    case Instruction::Store: {
+      StoreInst *SI = cast<StoreInst>(I);
+      Type *ValTy = SI->getValueOperand()->getType();
+      VectorTy = ToVectorTy(ValTy, VF);
+
+      if (VF == 1)
+        return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+                              SI->getAlignment(), SI->getPointerAddressSpace());
+
+      // Scalarized stores.
+      if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+        unsigned Cost = 0;
+        unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+                                              ValTy);
+        // The cost of extracting from the value vector.
+        Cost += VF * (ExtCost);
+        // The cost of the scalar stores.
+        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+                                           ValTy->getScalarType(),
+                                           SI->getAlignment(),
+                                           SI->getPointerAddressSpace());
+        return Cost;
+      }
+
+      // Wide stores.
+      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
+                                   SI->getPointerAddressSpace());
+    }
+    case Instruction::Load: {
+      LoadInst *LI = cast<LoadInst>(I);
+
+      if (VF == 1)
+        return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
+                                     LI->getAlignment(),
+                                     LI->getPointerAddressSpace());
+
+      // Scalarized loads.
+      if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+        unsigned Cost = 0;
+        unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
+        // The cost of inserting the loaded value into the result vector.
+        Cost += VF * (InCost);
+        // The cost of the scalar stores.
+        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+                                           RetTy->getScalarType(),
+                                           LI->getAlignment(),
+                                           LI->getPointerAddressSpace());
+        return Cost;
+      }
+
+      // Wide loads.
+      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
+                                   LI->getPointerAddressSpace());
+    }
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast: {
+      Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+      return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+    }
+    default: {
+      // We are scalarizing the instruction. Return the cost of the scalar
+      // instruction, plus the cost of insert and extract into vector
+      // elements, times the vector width.
+      unsigned Cost = 0;
+
+      bool IsVoid = RetTy->isVoidTy();
+
+      unsigned InsCost = (IsVoid ? 0 :
+                          VTTI->getInstrCost(Instruction::InsertElement,
+                                             VectorTy));
+
+      unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+                                            VectorTy);
+
+      // The cost of inserting the results plus extracting each one of the
+      // operands.
+      Cost += VF * (InsCost + ExtCost * I->getNumOperands());
+
+      // The cost of executing VF copies of the scalar instruction.
+      Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
+      return Cost;
+    }
+  }// end of switch.
+}
+
+Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
+  if (Scalar->isVoidTy() || VF == 1)
+    return Scalar;
+  return VectorType::get(Scalar, VF);
+}
+
 } // namespace
 
 char LoopVectorize::ID = 0;

Modified: llvm/branches/R600/lib/VMCore/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/AsmWriter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/AsmWriter.cpp (original)
+++ llvm/branches/R600/lib/VMCore/AsmWriter.cpp Tue Nov 13 09:21:47 2012
@@ -74,6 +74,7 @@
     case CallingConv::X86_StdCall:  Out << "x86_stdcallcc"; break;
     case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
     case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+    case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
     case CallingConv::ARM_APCS:     Out << "arm_apcscc"; break;
     case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc"; break;
     case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc"; break;

Modified: llvm/branches/R600/lib/VMCore/Attributes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Attributes.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Attributes.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Attributes.cpp Tue Nov 13 09:21:47 2012
@@ -127,7 +127,7 @@
   uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
   if (Attrs.hasAttribute(Attributes::Alignment))
     EncodedAttrs |= Attrs.getAlignment() << 16;
-  EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11;
+  EncodedAttrs |= (Attrs.Raw() & (0xffffULL << 21)) << 11;
   return EncodedAttrs;
 }
 
@@ -145,7 +145,7 @@
   AttrBuilder B(EncodedAttrs & 0xffff);
   if (Alignment)
     B.addAlignmentAttr(Alignment);
-  B.addRawValue((EncodedAttrs & (0xfffULL << 32)) >> 11);
+  B.addRawValue((EncodedAttrs & (0xffffULL << 32)) >> 11);
   return Attributes::get(C, B);
 }
 
@@ -201,6 +201,8 @@
     Result += "nonlazybind ";
   if (hasAttribute(Attributes::AddressSafety))
     Result += "address_safety ";
+  if (hasAttribute(Attributes::MinSize))
+    Result += "minsize ";
   if (hasAttribute(Attributes::StackAlignment)) {
     Result += "alignstack(";
     Result += utostr(getStackAlignment());
@@ -324,6 +326,7 @@
   case Attributes::UWTable:         return 1 << 30;
   case Attributes::NonLazyBind:     return 1U << 31;
   case Attributes::AddressSafety:   return 1ULL << 32;
+  case Attributes::MinSize:         return 1ULL << 33;
   }
   llvm_unreachable("Unsupported attribute type");
 }

Modified: llvm/branches/R600/lib/VMCore/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/CMakeLists.txt (original)
+++ llvm/branches/R600/lib/VMCore/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,3 @@
-set(LLVM_REQUIRES_RTTI 1)
-
 add_llvm_library(LLVMCore
   AsmWriter.cpp
   Attributes.cpp

Modified: llvm/branches/R600/lib/VMCore/ConstantFold.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/ConstantFold.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/ConstantFold.cpp (original)
+++ llvm/branches/R600/lib/VMCore/ConstantFold.cpp Tue Nov 13 09:21:47 2012
@@ -87,9 +87,13 @@
   Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
   Instruction::CastOps secondOp = Instruction::CastOps(opc);
 
+  // Assume that pointers are never more than 64 bits wide.
+  IntegerType *FakeIntPtrTy = Type::getInt64Ty(DstTy->getContext());
+
   // Let CastInst::isEliminableCastPair do the heavy lifting.
   return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
-                                        Type::getInt64Ty(DstTy->getContext()));
+                                        FakeIntPtrTy, FakeIntPtrTy,
+                                        FakeIntPtrTy);
 }
 
 static Constant *FoldBitCast(Constant *V, Type *DestTy) {
@@ -514,10 +518,6 @@
     return UndefValue::get(DestTy);
   }
 
-  // No compile-time operations on this type yet.
-  if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
-    return 0;
-
   if (V->isNullValue() && !DestTy->isX86_MMXTy())
     return Constant::getNullValue(DestTy);
 
@@ -576,6 +576,7 @@
                   DestTy->isDoubleTy() ? APFloat::IEEEdouble :
                   DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
                   DestTy->isFP128Ty() ? APFloat::IEEEquad :
+                  DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble :
                   APFloat::Bogus,
                   APFloat::rmNearestTiesToEven, &ignored);
       return ConstantFP::get(V->getContext(), Val);
@@ -646,7 +647,8 @@
   case Instruction::SIToFP:
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       APInt api = CI->getValue();
-      APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true);
+      APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()),
+                  !DestTy->isPPC_FP128Ty() /* isEEEE */);
       (void)apf.convertFromAPInt(api, 
                                  opc==Instruction::SIToFP,
                                  APFloat::rmNearestTiesToEven);
@@ -867,10 +869,6 @@
 
 Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
                                               Constant *C1, Constant *C2) {
-  // No compile-time operations on this type yet.
-  if (C1->getType()->isPPC_FP128Ty())
-    return 0;
-
   // Handle UndefValue up front.
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
     switch (Opcode) {
@@ -1273,10 +1271,6 @@
   assert(V1->getType() == V2->getType() &&
          "Cannot compare values of different types!");
 
-  // No compile-time operations on this type yet.
-  if (V1->getType()->isPPC_FP128Ty())
-    return FCmpInst::BAD_FCMP_PREDICATE;
-
   // Handle degenerate case quickly
   if (V1 == V2) return FCmpInst::FCMP_OEQ;
 
@@ -1602,10 +1596,6 @@
     return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
   }
 
-  // No compile-time operations on this type yet.
-  if (C1->getType()->isPPC_FP128Ty())
-    return 0;
-
   // icmp eq/ne(null,GV) -> false/true
   if (C1->isNullValue()) {
     if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))

Modified: llvm/branches/R600/lib/VMCore/DIBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/DIBuilder.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/DIBuilder.cpp (original)
+++ llvm/branches/R600/lib/VMCore/DIBuilder.cpp Tue Nov 13 09:21:47 2012
@@ -492,7 +492,8 @@
     NULL,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
   };
   return DIType(MDNode::get(VMContext, Elts));
 }

Modified: llvm/branches/R600/lib/VMCore/DataLayout.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/DataLayout.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/DataLayout.cpp (original)
+++ llvm/branches/R600/lib/VMCore/DataLayout.cpp Tue Nov 13 09:21:47 2012
@@ -660,13 +660,25 @@
   return Log2_32(Align);
 }
 
-/// getIntPtrType - Return an unsigned integer type that is the same size or
-/// greater to the host pointer size.
+/// getIntPtrType - Return an integer type with size at least as big as that
+/// of a pointer in the given address space.
 IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
                                        unsigned AddressSpace) const {
   return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
 }
 
+/// getIntPtrType - Return an integer (vector of integer) type with size at
+/// least as big as that of a pointer of the given pointer (vector of pointer)
+/// type.
+Type *DataLayout::getIntPtrType(Type *Ty) const {
+  assert(Ty->isPtrOrPtrVectorTy() &&
+         "Expected a pointer or pointer vector type.");
+  unsigned NumBits = getTypeSizeInBits(Ty->getScalarType());
+  IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
+  if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+    return VectorType::get(IntTy, VecTy->getNumElements());
+  return IntTy;
+}
 
 uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
                                       ArrayRef<Value *> Indices) const {

Modified: llvm/branches/R600/lib/VMCore/Instructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Instructions.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Instructions.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Instructions.cpp Tue Nov 13 09:21:47 2012
@@ -1399,18 +1399,6 @@
   return getIndexedTypeInternal(Ptr, IdxList);
 }
 
-unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) {
-  Type *Ty = Ptr->getType();
-
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    Ty = VTy->getElementType();
-
-  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
-    return PTy->getAddressSpace();
-
-  llvm_unreachable("Invalid GEP pointer type");
-}
-
 /// hasAllZeroIndices - Return true if all of the indices of this GEP are
 /// zeros.  If so, the result pointer and the first operand have the same
 /// value, just potentially different types.
@@ -2130,7 +2118,8 @@
 /// If no such cast is permited, the function returns 0.
 unsigned CastInst::isEliminableCastPair(
   Instruction::CastOps firstOp, Instruction::CastOps secondOp,
-  Type *SrcTy, Type *MidTy, Type *DstTy, Type *IntPtrTy) {
+  Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy,
+  Type *DstIntPtrTy) {
   // Define the 144 possibilities for these two cast instructions. The values
   // in this matrix determine what to do in a given situation and select the
   // case in the switch below.  The rows correspond to firstOp, the columns 
@@ -2233,9 +2222,9 @@
       return 0;
     case 7: { 
       // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
-      if (!IntPtrTy)
+      if (!SrcIntPtrTy || DstIntPtrTy != SrcIntPtrTy)
         return 0;
-      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned PtrSize = SrcIntPtrTy->getScalarSizeInBits();
       unsigned MidSize = MidTy->getScalarSizeInBits();
       if (MidSize >= PtrSize)
         return Instruction::BitCast;
@@ -2274,9 +2263,9 @@
       return 0;
     case 13: {
       // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
-      if (!IntPtrTy)
+      if (!MidIntPtrTy)
         return 0;
-      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits();
       unsigned SrcSize = SrcTy->getScalarSizeInBits();
       unsigned DstSize = DstTy->getScalarSizeInBits();
       if (SrcSize <= PtrSize && SrcSize == DstSize)

Modified: llvm/branches/R600/lib/VMCore/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Makefile?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Makefile (original)
+++ llvm/branches/R600/lib/VMCore/Makefile Tue Nov 13 09:21:47 2012
@@ -9,7 +9,6 @@
 LEVEL = ../..
 LIBRARYNAME = LLVMCore
 BUILD_ARCHIVE = 1
-REQUIRES_RTTI = 1
 
 BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
 

Modified: llvm/branches/R600/lib/VMCore/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/TargetTransformInfo.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/TargetTransformInfo.cpp (original)
+++ llvm/branches/R600/lib/VMCore/TargetTransformInfo.cpp Tue Nov 13 09:21:47 2012
@@ -17,11 +17,15 @@
 /// @note This has to exist, because this is a pass, but it should never be
 /// used.
 TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) {
+  /// You are seeing this error because your pass required the TTI
+  /// using a call to "getAnalysis<TargetTransformInfo>()", and you did
+  /// not initialize a machine target which can provide the TTI.
+  /// You should use "getAnalysisIfAvailable<TargetTransformInfo>()" instead.
   report_fatal_error("Bad TargetTransformInfo ctor used.  "
                      "Tool did not specify a TargetTransformInfo to use?");
 }
 
-INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo",
+INITIALIZE_PASS(TargetTransformInfo, "targettransforminfo",
                 "Target Transform Info", false, true)
 char TargetTransformInfo::ID = 0;
 

Modified: llvm/branches/R600/lib/VMCore/Type.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Type.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Type.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Type.cpp Tue Nov 13 09:21:47 2012
@@ -47,35 +47,17 @@
   return this;
 }
 
+const Type *Type::getScalarType() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
 /// isIntegerTy - Return true if this is an IntegerType of the specified width.
 bool Type::isIntegerTy(unsigned Bitwidth) const {
   return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
 }
 
-/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
-/// integer types.
-///
-bool Type::isIntOrIntVectorTy() const {
-  if (isIntegerTy())
-    return true;
-  if (getTypeID() != Type::VectorTyID) return false;
-  
-  return cast<VectorType>(this)->getElementType()->isIntegerTy();
-}
-
-/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
-///
-bool Type::isFPOrFPVectorTy() const {
-  if (getTypeID() == Type::HalfTyID || getTypeID() == Type::FloatTyID ||
-      getTypeID() == Type::DoubleTyID ||
-      getTypeID() == Type::FP128TyID || getTypeID() == Type::X86_FP80TyID || 
-      getTypeID() == Type::PPC_FP128TyID)
-    return true;
-  if (getTypeID() != Type::VectorTyID) return false;
-  
-  return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
-}
-
 // canLosslesslyBitCastTo - Return true if this type can be converted to
 // 'Ty' without any reinterpretation of bits.  For example, i8* to i32*.
 //
@@ -233,7 +215,7 @@
 }
 
 unsigned Type::getPointerAddressSpace() const {
-  return cast<PointerType>(this)->getAddressSpace();
+  return cast<PointerType>(getScalarType())->getAddressSpace();
 }
 
 

Modified: llvm/branches/R600/lib/VMCore/User.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/User.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/User.cpp (original)
+++ llvm/branches/R600/lib/VMCore/User.cpp Tue Nov 13 09:21:47 2012
@@ -10,6 +10,7 @@
 #include "llvm/Constant.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/User.h"
+#include "llvm/Operator.h"
 
 namespace llvm {
 
@@ -78,4 +79,12 @@
   ::operator delete(Storage);
 }
 
+//===----------------------------------------------------------------------===//
+//                             Operator Class
+//===----------------------------------------------------------------------===//
+
+Operator::~Operator() {
+  llvm_unreachable("should never destroy an Operator");
+}
+
 } // End llvm namespace

Modified: llvm/branches/R600/lib/VMCore/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Verifier.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Verifier.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Verifier.cpp Tue Nov 13 09:21:47 2012
@@ -705,6 +705,7 @@
   case CallingConv::Cold:
   case CallingConv::X86_FastCall:
   case CallingConv::X86_ThisCall:
+  case CallingConv::Intel_OCL_BI:
   case CallingConv::PTX_Kernel:
   case CallingConv::PTX_Device:
     Assert1(!F.isVarArg(),

Modified: llvm/branches/R600/projects/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/projects/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/projects/CMakeLists.txt (original)
+++ llvm/branches/R600/projects/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -14,8 +14,6 @@
 # Also add in the compiler-rt tree if present and we have a sufficiently
 # recent version of CMake.
 if(${CMAKE_VERSION} VERSION_GREATER 2.8.7 AND
-   ${LLVM_BUILD_RUNTIME} AND
-   IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt AND
-   EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt/CMakeLists.txt)
-  add_subdirectory(compiler-rt)
+   ${LLVM_BUILD_RUNTIME})
+  add_llvm_external_project(compiler-rt)
 endif()

Modified: llvm/branches/R600/projects/sample/autoconf/configure.ac
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/projects/sample/autoconf/configure.ac?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/projects/sample/autoconf/configure.ac (original)
+++ llvm/branches/R600/projects/sample/autoconf/configure.ac Tue Nov 13 09:21:47 2012
@@ -304,8 +304,8 @@
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
-  mips-*)                 llvm_cv_target_arch="Mips" ;;
-  mipsel-*)               llvm_cv_target_arch="Mips" ;;
+  mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
+  mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;

Modified: llvm/branches/R600/projects/sample/configure
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/projects/sample/configure?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/projects/sample/configure (original)
+++ llvm/branches/R600/projects/sample/configure Tue Nov 13 09:21:47 2012
@@ -3840,8 +3840,8 @@
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
-  mips-*)                 llvm_cv_target_arch="Mips" ;;
-  mipsel-*)               llvm_cv_target_arch="Mips" ;;
+  mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
+  mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;

Modified: llvm/branches/R600/runtime/libprofile/CommonProfiling.c
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/runtime/libprofile/CommonProfiling.c?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/runtime/libprofile/CommonProfiling.c (original)
+++ llvm/branches/R600/runtime/libprofile/CommonProfiling.c Tue Nov 13 09:21:47 2012
@@ -28,14 +28,35 @@
 
 static char *SavedArgs = 0;
 static unsigned SavedArgsLength = 0;
+static const char *SavedEnvVar = 0;
 
 static const char *OutputFilename = "llvmprof.out";
 
+/* check_environment_variable - Check to see if the LLVMPROF_OUTPUT environment
+ * variable is set.  If it is then save it and set OutputFilename.
+ */
+static void check_environment_variable(void) {
+  const char *EnvVar;
+  if (SavedEnvVar) return; /* Guarantee that we can't leak memory. */
+
+  if ((EnvVar = getenv("LLVMPROF_OUTPUT")) != NULL) {
+    /* The string that getenv returns is allowed to be statically allocated,
+     * which means it may be changed by future calls to getenv, so copy it.
+     */
+    SavedEnvVar = strdup(EnvVar);
+    OutputFilename = SavedEnvVar;
+  }
+}
+
 /* save_arguments - Save argc and argv as passed into the program for the file
  * we output.
+ * If either the LLVMPROF_OUTPUT environment variable or the -llvmprof-output
+ * command line argument are set then change OutputFilename to the provided
+ * value.  The command line argument value overrides the environment variable.
  */
 int save_arguments(int argc, const char **argv) {
   unsigned Length, i;
+  if (!SavedEnvVar && !SavedArgs) check_environment_variable();
   if (SavedArgs || !argv) return argc;  /* This can be called multiple times */
 
   /* Check to see if there are any arguments passed into the program for the
@@ -54,6 +75,7 @@
         puts("-llvmprof-output requires a filename argument!");
       else {
         OutputFilename = strdup(argv[1]);
+        if (SavedEnvVar) { free((void *)SavedEnvVar); SavedEnvVar = 0; }
         memmove((char**)&argv[1], &argv[2], (argc-1)*sizeof(char*));
         --argc;
       }

Modified: llvm/branches/R600/test/Analysis/BasicAA/nocapture.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/BasicAA/nocapture.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Analysis/BasicAA/nocapture.ll (original)
+++ llvm/branches/R600/test/Analysis/BasicAA/nocapture.ll Tue Nov 13 09:21:47 2012
@@ -13,3 +13,24 @@
        ret i32 %c
 }
 
+declare void @test3(i32** %p, i32* %q) nounwind
+
+define i32 @test4(i32* noalias nocapture %p) nounwind {
+; CHECK: call void @test3
+; CHECK: store i32 0, i32* %p
+; CHECK: store i32 1, i32* %x
+; CHECK: %y = load i32* %p
+; CHECK: ret i32 %y
+entry:
+       %q = alloca i32*
+       ; Here test3 might store %p to %q. This doesn't violate %p's nocapture
+       ; attribute since the copy doesn't outlive the function.
+       call void @test3(i32** %q, i32* %p) nounwind
+       store i32 0, i32* %p
+       %x = load i32** %q
+       ; This store might write to %p and so we can't eliminate the subsequent
+       ; load
+       store i32 1, i32* %x
+       %y = load i32* %p
+       ret i32 %y
+}

Added: llvm/branches/R600/test/Analysis/CostModel/X86/arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/arith.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/arith.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/arith.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,42 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @add(i32 %arg) {
+  ;CHECK: cost of 1 {{.*}} add
+  %A = add <4 x i32> undef, undef
+  ;CHECK: cost of 4 {{.*}} add
+  %B = add <8 x i32> undef, undef
+  ;CHECK: cost of 1 {{.*}} add
+  %C = add <2 x i64> undef, undef
+  ;CHECK: cost of 4 {{.*}} add
+  %D = add <4 x i64> undef, undef
+  ;CHECK: cost of 8 {{.*}} add
+  %E = add <8 x i64> undef, undef
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+
+define i32 @xor(i32 %arg) {
+  ;CHECK: cost of 1 {{.*}} xor
+  %A = xor <4 x i32> undef, undef
+  ;CHECK: cost of 1 {{.*}} xor
+  %B = xor <8 x i32> undef, undef
+  ;CHECK: cost of 1 {{.*}} xor
+  %C = xor <2 x i64> undef, undef
+  ;CHECK: cost of 1 {{.*}} xor
+  %D = xor <4 x i64> undef, undef
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+
+define i32 @fmul(i32 %arg) {
+  ;CHECK: cost of 1 {{.*}} fmul
+  %A = fmul <4 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fmul
+  %B = fmul <8 x float> undef, undef
+  ret i32 undef
+}

Added: llvm/branches/R600/test/Analysis/CostModel/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/cast.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/cast.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/cast.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,69 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @add(i32 %arg) {
+
+  ; -- Same size registeres --
+  ;CHECK: cost of 1 {{.*}} zext
+  %A = zext <4 x i1> undef to <4 x i32>
+  ;CHECK: cost of 2 {{.*}} sext
+  %B = sext <4 x i1> undef to <4 x i32>
+  ;CHECK: cost of 0 {{.*}} trunc
+  %C = trunc <4 x i32> undef to <4 x i1>
+
+  ; -- Different size registers --
+  ;CHECK-NOT: cost of 1 {{.*}} zext
+  %D = zext <8 x i1> undef to <8 x i32>
+  ;CHECK-NOT: cost of 2 {{.*}} sext
+  %E = sext <8 x i1> undef to <8 x i32>
+  ;CHECK-NOT: cost of 2 {{.*}} trunc
+  %F = trunc <8 x i32> undef to <8 x i1>
+
+  ; -- scalars --
+
+  ;CHECK: cost of 1 {{.*}} zext
+  %G = zext i1 undef to i32
+  ;CHECK: cost of 0 {{.*}} trunc
+  %H = trunc i32 undef to i1
+
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+define i32 @zext_sext(<8 x i1> %in) {
+  ;CHECK: cost of 6 {{.*}} zext
+  %Z = zext <8 x i1> %in to <8 x i32>
+  ;CHECK: cost of 9 {{.*}} sext
+  %S = sext <8 x i1> %in to <8 x i32>
+
+  ;CHECK: cost of 1 {{.*}} sext
+  %A = sext <8 x i16> undef to <8 x i32>
+  ;CHECK: cost of 1 {{.*}} zext
+  %B = zext <8 x i16> undef to <8 x i32>
+  ;CHECK: cost of 1 {{.*}} sext
+  %C = sext <4 x i32> undef to <4 x i64>
+
+  ;CHECK: cost of 1 {{.*}} zext
+  %D = zext <4 x i32> undef to <4 x i64>
+  ;CHECK: cost of 1 {{.*}} trunc
+
+  %E = trunc <4 x i64> undef to <4 x i32>
+  ;CHECK: cost of 1 {{.*}} trunc
+  %F = trunc <8 x i32> undef to <8 x i16>
+
+  ;CHECK: cost of 3 {{.*}} trunc
+  %G = trunc <8 x i64> undef to <8 x i32>
+
+  ret i32 undef
+}
+
+define i32 @masks(<8 x i1> %in) {
+  ;CHECK: cost of 6 {{.*}} zext
+  %Z = zext <8 x i1> %in to <8 x i32>
+  ;CHECK: cost of 9 {{.*}} sext
+  %S = sext <8 x i1> %in to <8 x i32>
+  ret i32 undef
+}
+

Added: llvm/branches/R600/test/Analysis/CostModel/X86/cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/cmp.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/cmp.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/cmp.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,42 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @cmp(i32 %arg) {
+  ;  -- floats --
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %A = fcmp olt <2 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %B = fcmp olt <4 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %C = fcmp olt <8 x float> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %D = fcmp olt <2 x double> undef, undef
+  ;CHECK: cost of 1 {{.*}} fcmp
+  %E = fcmp olt <4 x double> undef, undef
+
+  ;  -- integers --
+
+  ;CHECK: cost of 1 {{.*}} icmp
+  %F = icmp eq <16 x i8> undef, undef
+  ;CHECK: cost of 1 {{.*}} icmp
+  %G = icmp eq <8 x i16> undef, undef
+  ;CHECK: cost of 1 {{.*}} icmp
+  %H = icmp eq <4 x i32> undef, undef
+  ;CHECK: cost of 1 {{.*}} icmp
+  %I = icmp eq <2 x i64> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %J = icmp eq <4 x i64> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %K = icmp eq <8 x i32> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %L = icmp eq <16 x i16> undef, undef
+  ;CHECK: cost of 4 {{.*}} icmp
+  %M = icmp eq <32 x i8> undef, undef
+
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+

Added: llvm/branches/R600/test/Analysis/CostModel/X86/i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/i32.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/i32.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/i32.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,9 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s
+
+
+;CHECK: cost of 2 {{.*}} add
+;CHECK: cost of 1 {{.*}} ret
+define i32 @no_info(i32 %arg) {
+  %e = add i64 undef, undef
+  ret i32 undef
+}

Added: llvm/branches/R600/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/insert-extract-at-zero.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/insert-extract-at-zero.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/insert-extract-at-zero.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
+  ;CHECK: cost of 0 {{.*}} extract
+  %A = extractelement <4 x float> undef, i32 0
+  ;CHECK: cost of 1 {{.*}} extract
+  %B = extractelement <4 x i32> undef, i32 0
+  ;CHECK: cost of 1 {{.*}} extract
+  %C = extractelement <4 x float> undef, i32 1
+
+  ;CHECK: cost of 0 {{.*}} extract
+  %D = extractelement <8 x float> undef, i32 0
+  ;CHECK: cost of 1 {{.*}} extract
+  %E = extractelement <8 x float> undef, i32 1
+
+  ;CHECK: cost of 1 {{.*}} extract
+  %F = extractelement <8 x float> undef, i32 %arg
+
+  ;CHECK: cost of 0 {{.*}} insert
+  %G = insertelement <4 x float> undef, float %fl, i32 0
+  ;CHECK: cost of 1 {{.*}} insert
+  %H = insertelement <4 x float> undef, float %fl, i32 1
+  ;CHECK: cost of 1 {{.*}} insert
+  %I = insertelement <4 x i32> undef, i32 %arg, i32 0
+
+  ;CHECK: cost of 0 {{.*}} insert
+  %J = insertelement <4 x double> undef, double undef, i32 0
+
+  ;CHECK: cost of 0 {{.*}} insert
+  %K = insertelement <8 x double> undef, double undef, i32 4
+  ;CHECK: cost of 0 {{.*}} insert
+  %L = insertelement <16 x double> undef, double undef, i32 8
+  ;CHECK: cost of 1 {{.*}} insert
+  %M = insertelement <16 x double> undef, double undef, i32 9
+  ret i32 0
+}
+

Added: llvm/branches/R600/test/Analysis/CostModel/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/lit.local.cfg?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/lit.local.cfg (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/lit.local.cfg Tue Nov 13 09:21:47 2012
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+

Added: llvm/branches/R600/test/Analysis/CostModel/X86/loop_v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/loop_v2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/loop_v2.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/loop_v2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,43 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define i32 @foo(i32* nocapture %A) nounwind uwtable readonly ssp {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
+  %0 = getelementptr inbounds i32* %A, i64 %index
+  %1 = bitcast i32* %0 to <2 x i32>*
+  %2 = load <2 x i32>* %1, align 4
+  %3 = sext <2 x i32> %2 to <2 x i64>
+  ;CHECK: cost of 1 {{.*}} extract
+  %4 = extractelement <2 x i64> %3, i32 0
+  %5 = getelementptr inbounds i32* %A, i64 %4
+  ;CHECK: cost of 1 {{.*}} extract
+  %6 = extractelement <2 x i64> %3, i32 1
+  %7 = getelementptr inbounds i32* %A, i64 %6
+  %8 = load i32* %5, align 4, !tbaa !0
+  ;CHECK: cost of 1 {{.*}} insert
+  %9 = insertelement <2 x i32> undef, i32 %8, i32 0
+  %10 = load i32* %7, align 4, !tbaa !0
+  ;CHECK: cost of 1 {{.*}} insert
+  %11 = insertelement <2 x i32> %9, i32 %10, i32 1
+  %12 = add nsw <2 x i32> %11, %vec.phi
+  %index.next = add i64 %index, 2
+  %13 = icmp eq i64 %index.next, 192
+  br i1 %13, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  %14 = extractelement <2 x i32> %12, i32 0
+  %15 = extractelement <2 x i32> %12, i32 1
+  %16 = add i32 %14, %15
+  ret i32 %16
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}

Added: llvm/branches/R600/test/Analysis/CostModel/X86/tiny.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/tiny.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/tiny.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/tiny.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,11 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: cost of 1 {{.*}} add
+;CHECK: cost of 1 {{.*}} ret
+define i32 @no_info(i32 %arg) {
+  %e = add i32 %arg, %arg
+  ret i32 %e
+}

Added: llvm/branches/R600/test/Analysis/CostModel/X86/vectorized-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/X86/vectorized-loop.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/X86/vectorized-loop.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/X86/vectorized-loop.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,78 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @foo(i32* noalias nocapture %A, i32* noalias nocapture %B, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  ;CHECK: cost of 1 {{.*}} icmp
+  %cmp7 = icmp slt i32 %start, %end
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  ;CHECK: cost of 1 {{.*}} sext
+  %0 = sext i32 %start to i64
+  %1 = sub i32 %end, %start
+  %2 = zext i32 %1 to i64
+  %end.idx = add i64 %2, %0
+  ;CHECK: cost of 1 {{.*}} add
+  %n.vec = and i64 %2, 4294967288
+  %end.idx.rnd.down = add i64 %n.vec, %0
+  ;CHECK: cost of 1 {{.*}} icmp
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %middle.block, label %vector.body
+
+vector.body:                                      ; preds = %for.body.lr.ph, %vector.body
+  %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ]
+  %3 = add i64 %index, 2
+  %4 = getelementptr inbounds i32* %B, i64 %3
+  ;CHECK: cost of 0 {{.*}} bitcast
+  %5 = bitcast i32* %4 to <8 x i32>*
+  ;CHECK: cost of 1 {{.*}} load
+  %6 = load <8 x i32>* %5, align 4
+  ;CHECK: cost of 4 {{.*}} mul
+  %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  %8 = getelementptr inbounds i32* %A, i64 %index
+  %9 = bitcast i32* %8 to <8 x i32>*
+  %10 = load <8 x i32>* %9, align 4
+  ;CHECK: cost of 4 {{.*}} add
+  %11 = add nsw <8 x i32> %10, %7
+  ;CHECK: cost of 1 {{.*}} store
+  store <8 x i32> %11, <8 x i32>* %9, align 4
+  %index.next = add i64 %index, 8
+  %12 = icmp eq i64 %index.next, %end.idx.rnd.down
+  ;CHECK: cost of 1 {{.*}} br
+  br i1 %12, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body, %for.body.lr.ph
+  %cmp.n = icmp eq i64 %end.idx, %end.idx.rnd.down
+  br i1 %cmp.n, label %for.end, label %for.body
+
+for.body:                                         ; preds = %middle.block, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ]
+  %13 = add nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32* %B, i64 %13
+  ;CHECK: cost of 1 {{.*}} load
+  %14 = load i32* %arrayidx, align 4, !tbaa !0
+  ;CHECK: cost of 1 {{.*}} mul
+  %mul = mul nsw i32 %14, 5
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  ;CHECK: cost of 1 {{.*}} load
+  %15 = load i32* %arrayidx2, align 4, !tbaa !0
+  %add3 = add nsw i32 %15, %mul
+  store i32 %add3, i32* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  ;CHECK: cost of 0 {{.*}} trunc
+  %16 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %16, %end
+  ;CHECK: cost of 1 {{.*}} br
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %middle.block, %for.body, %entry
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}

Copied: llvm/branches/R600/test/Analysis/CostModel/lit.local.cfg (from r167837, llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/lit.local.cfg)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/lit.local.cfg?p2=llvm/branches/R600/test/Analysis/CostModel/lit.local.cfg&p1=llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/lit.local.cfg&r1=167837&r2=167838&rev=167838&view=diff
==============================================================================
    (empty)

Added: llvm/branches/R600/test/Analysis/CostModel/no_info.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CostModel/no_info.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CostModel/no_info.ll (added)
+++ llvm/branches/R600/test/Analysis/CostModel/no_info.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,15 @@
+; RUN: opt < %s -cost-model -analyze | FileCheck %s
+
+; The cost model does not have any target information so it can't make a decision.
+; Notice that OPT does not read the triple information from the module itself, only through the command line.
+
+; This info ignored:
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: Unknown cost {{.*}} add
+;CHECK: Unknown cost {{.*}} ret
+define i32 @no_info(i32 %arg) {
+  %e = add i32 %arg, %arg
+  ret i32 %e
+}

Removed: llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/alias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/alias.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/alias.ll (original)
+++ llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/alias.ll (removed)
@@ -1,44 +0,0 @@
-; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
-
-;; x[5] = x[6] // with x being a pointer passed as argument
-
-define void @f1(i32* nocapture %xptr) nounwind {
-entry:
-  %x.ld.addr = getelementptr i32* %xptr, i64 6
-  %x.st.addr = getelementptr i32* %xptr, i64 5
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x = load i32* %x.ld.addr
-  store i32 %x, i32* %x.st.addr
-; CHECK: 0,1: dep
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; x[5] = x[6] // with x being an array on the stack
-
-define void @foo(...) nounwind {
-entry:
-  %xptr = alloca [256 x i32], align 4
-  %x.ld.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 6
-  %x.st.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 5
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x = load i32* %x.ld.addr
-  store i32 %x, i32* %x.st.addr
-; CHECK: 0,1: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}

Removed: llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/lit.local.cfg?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/lit.local.cfg (original)
+++ llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/lit.local.cfg (removed)
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']

Removed: llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-strong.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-strong.ll (original)
+++ llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-strong.ll (removed)
@@ -1,110 +0,0 @@
-; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
-
- at x = common global [256 x i32] zeroinitializer, align 4
- at y = common global [256 x i32] zeroinitializer, align 4
-
-;; for (i = 0; i < 256; i++)
-;;   x[i] = x[i] + y[i]
-
-define void @f1(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %x = load i32* %x.addr      ; 0
-  %y = load i32* %y.addr      ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.addr  ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; for (i = 0; i < 256; i++)
-;;   x[i+1] = x[i] + y[i]
-
-define void @f2(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %i.next = add i64 %i, 1
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.next
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; for (i = 0; i < 10; i++)
-;;   x[i+20] = x[i] + y[i]
-
-define void @f3(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %i.20 = add i64 %i, 20
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.20
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 10
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; for (i = 0; i < 10; i++)
-;;   x[10*i+1] = x[10*i] + y[i]
-
-define void @f4(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %i.10 = mul i64 %i, 10
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.10
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10
-  %i.10.1 = add i64 %i.10, 1
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10.1
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 10
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}

Removed: llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll (original)
+++ llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll (removed)
@@ -1,118 +0,0 @@
-; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
-
- at x = common global [256 x i32] zeroinitializer, align 4
- at y = common global [256 x i32] zeroinitializer, align 4
-
-;; for (i = 0; i < 256; i++)
-;;   x[i] = x[255 - i] + y[i]
-
-define void @f1(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %i.255 = sub i64 255, %i
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; for (i = 0; i < 100; i++)
-;;   x[i] = x[255 - i] + y[i]
-
-define void @f2(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %i.255 = sub i64 255, %i
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 100
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; // the first iteration (i=0) leads to an out-of-bounds access of x. as the
-;; // result of this access is undefined, _any_ dependence result is safe.
-;; for (i = 0; i < 256; i++)
-;;   x[i] = x[256 - i] + y[i]
-
-define void @f3(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %i.256 = sub i64 0, %i
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2:
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; // slightly contrived but valid IR for the following loop, where all
-;; // accesses in all iterations are within bounds. while this example's first
-;; // (ZIV-)subscript is (0, 1), accesses are dependent.
-;; for (i = 1; i < 256; i++)
-;;   x[i] = x[256 - i] + y[i]
-
-define void @f4(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %i.1 = add i64 1, %i
-  %i.256 = sub i64 -1, %i
-  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.1
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.1
-  %x = load i32* %x.ld.addr     ; 0
-  %y = load i32* %y.ld.addr     ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.st.addr ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}

Removed: llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll (original)
+++ llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll (removed)
@@ -1,56 +0,0 @@
-; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
-
- at x = common global [256 x i32] zeroinitializer, align 4
- at y = common global [256 x i32] zeroinitializer, align 4
-
-;; for (i = 0; i < 256; i++)
-;;   x[i] = x[42] + y[i]
-
-define void @f1(...) nounwind {
-entry:
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 42
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x = load i32* %x.ld.addr   ; 0
-  %y = load i32* %y.addr      ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.addr  ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; for (i = 0; i < 250; i++)
-;;   x[i] = x[255] + y[i]
-
-define void @f2(...) nounwind {
-entry:
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 255
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
-  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
-  %x = load i32* %x.ld.addr   ; 0
-  %y = load i32* %y.addr      ; 1
-  %r = add i32 %y, %x
-  store i32 %r, i32* %x.addr  ; 2
-; CHECK: 0,2: dep
-; CHECK: 1,2: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 250
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}

Removed: llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/ziv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/ziv.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/ziv.ll (original)
+++ llvm/branches/R600/test/Analysis/LoopDependenceAnalysis/ziv.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
-
- at x = common global [256 x i32] zeroinitializer, align 4
-
-;; x[5] = x[6]
-
-define void @f1(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
-  store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 5)
-; CHECK: 0,1: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; x[c] = x[c+1] // with c being a loop-invariant constant
-
-define void @f2(i64 %c0) nounwind {
-entry:
-  %c1 = add i64 %c0, 1
-  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c0
-  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c1
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x = load i32* %x.ld.addr
-  store i32 %x, i32* %x.st.addr
-; CHECK: 0,1: ind
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-;; x[6] = x[6]
-
-define void @f3(...) nounwind {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
-  store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
-; CHECK: 0,1: dep
-  %i.next = add i64 %i, 1
-  %exitcond = icmp eq i64 %i.next, 256
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}

Modified: llvm/branches/R600/test/BugPoint/crash-narrowfunctiontest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/BugPoint/crash-narrowfunctiontest.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/BugPoint/crash-narrowfunctiontest.ll (original)
+++ llvm/branches/R600/test/BugPoint/crash-narrowfunctiontest.ll Tue Nov 13 09:21:47 2012
@@ -2,7 +2,7 @@
 ;
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
 ; REQUIRES: loadable_module
-; XFAIL: lto
+; XFAIL: lto_on_osx
 
 define i32 @foo() { ret i32 1 }
 

Modified: llvm/branches/R600/test/BugPoint/metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/BugPoint/metadata.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/BugPoint/metadata.ll (original)
+++ llvm/branches/R600/test/BugPoint/metadata.ll Tue Nov 13 09:21:47 2012
@@ -1,7 +1,7 @@
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
 ; REQUIRES: loadable_module
-; XFAIL: lto
+; XFAIL: lto_on_osx
 
 ; Bugpoint should keep the call's metadata attached to the call.
 

Modified: llvm/branches/R600/test/BugPoint/remove_arguments_test.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/BugPoint/remove_arguments_test.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/BugPoint/remove_arguments_test.ll (original)
+++ llvm/branches/R600/test/BugPoint/remove_arguments_test.ll Tue Nov 13 09:21:47 2012
@@ -1,7 +1,7 @@
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
 ; REQUIRES: loadable_module
-; XFAIL: lto
+; XFAIL: lto_on_osx
 
 ; Test to make sure that arguments are removed from the function if they are 
 ; unnecessary. And clean up any types that that frees up too.

Modified: llvm/branches/R600/test/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CMakeLists.txt (original)
+++ llvm/branches/R600/test/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -21,7 +21,12 @@
           llc lli llvm-ar llvm-as
           llvm-bcanalyzer llvm-diff
           llvm-dis llvm-extract llvm-dwarfdump
-          llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
+          llvm-link
+          llvm-mc
+          llvm-mcmarkup
+          llvm-nm
+          llvm-objdump
+          llvm-readobj
           macho-dump opt
           profile_rt-shared
           FileCheck count not

Added: llvm/branches/R600/test/CodeGen/ARM/call-noret-minsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/call-noret-minsize.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/call-noret-minsize.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/call-noret-minsize.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift       | FileCheck %s -check-prefix=SWIFT
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2
+; rdar://12348580
+
+define void @t1() noreturn minsize nounwind ssp {
+entry:
+; ARM: t1:
+; ARM: bl _bar
+
+; SWIFT: t1:
+; SWIFT: bl _bar
+
+; T2: t1:
+; T2: blx _bar
+  tail call void @bar() noreturn nounwind
+  unreachable
+}
+
+define void @t2() noreturn minsize nounwind ssp {
+entry:
+; ARM: t2:
+; ARM: bl _t1
+
+; SWIFT: t2:
+; SWIFT: bl _t1
+
+; T2: t2:
+; T2: bl _t1
+  tail call void @t1() noreturn nounwind
+  unreachable
+}
+
+declare void @bar() noreturn

Modified: llvm/branches/R600/test/CodeGen/ARM/carry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/carry.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/carry.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/carry.ll Tue Nov 13 09:21:47 2012
@@ -45,3 +45,16 @@
   %0 = sub nsw i64 0, %x
   ret i64 %0
 }
+
+; rdar://12559385
+define i64 @f5(i32 %vi) {
+entry:
+; CHECK: f5:
+; CHECK: movw [[REG:r[0-9]+]], #36102
+; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+    %v0 = zext i32 %vi to i64
+    %v1 = xor i64 %v0, -155057456198619
+    %v4 = add i64 %v1, 155057456198619
+    %v5 = add i64 %v4, %v1
+    ret i64 %v5
+}

Modified: llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll Tue Nov 13 09:21:47 2012
@@ -289,3 +289,31 @@
   %tmp18 = insertvalue %struct.wombat.5 %tmp17, <4 x float> undef, 3, 0
   ret %struct.wombat.5 %tmp18
 }
+
+; CHECK: adjustCopiesBackFrom
+; The shuffle in if.else3 must be preserved even though adjustCopiesBackFrom
+; is tempted to remove it.
+; CHECK: %if.else3
+; CHECK: vorr d
+define internal void @adjustCopiesBackFrom(<2 x i64>* noalias nocapture sret %agg.result, <2 x i64> %in) {
+entry:
+  %0 = extractelement <2 x i64> %in, i32 0
+  %cmp = icmp slt i64 %0, 1
+  %.in = select i1 %cmp, <2 x i64> <i64 0, i64 undef>, <2 x i64> %in
+  %1 = extractelement <2 x i64> %in, i32 1
+  %cmp1 = icmp slt i64 %1, 1
+  br i1 %cmp1, label %if.then2, label %if.else3
+
+if.then2:                                         ; preds = %entry
+  %2 = insertelement <2 x i64> %.in, i64 0, i32 1
+  br label %if.end4
+
+if.else3:                                         ; preds = %entry
+  %3 = shufflevector <2 x i64> %.in, <2 x i64> %in, <2 x i32> <i32 0, i32 3>
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else3, %if.then2
+  %result.2 = phi <2 x i64> [ %2, %if.then2 ], [ %3, %if.else3 ]
+  store <2 x i64> %result.2, <2 x i64>* %agg.result, align 128
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/ARM/indirectbr-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/indirectbr-2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/indirectbr-2.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/indirectbr-2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,46 @@
+; RUN: llc < %s -O0 -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s
+; <rdar://problem/12529625>
+
+ at foo = global i32 34879, align 4
+ at DWJumpTable2808 = global [2 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@func, %14) to i32), i32 ptrtoint (i8* blockaddress(@func, %4) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@func, %13) to i32), i32 ptrtoint (i8* blockaddress(@func, %4) to i32))]
+ at 0 = internal constant [45 x i8] c"func XXXXXXXXXXX :: bb xxxxxxxxxxxxxxxxxxxx\0A\00"
+
+; The indirect branch has the two destinations as successors. The lone PHI
+; statement shouldn't be implicitly defined.
+
+; CHECK:      func:
+; CHECK:      Ltmp1:    @ Block address taken
+; CHECK-NOT:            @ implicit-def: R0
+; CHECK:                @ 4-byte Reload
+
+define i32 @func() nounwind ssp {
+  %1 = alloca i32, align 4
+  %2 = load i32* @foo, align 4
+  %3 = icmp eq i32 %2, 34879
+  br label %4
+
+; <label>:4                                       ; preds = %0
+  %5 = zext i1 %3 to i32
+  %6 = mul i32 %5, 287
+  %7 = add i32 %6, 2
+  %8 = getelementptr [2 x i32]* @DWJumpTable2808, i32 0, i32 %5
+  %9 = load i32* %8
+  %10 = add i32 %9, ptrtoint (i8* blockaddress(@func, %4) to i32)
+  %11 = inttoptr i32 %10 to i8*
+  %12 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @0, i32 0, i32 0))
+  indirectbr i8* %11, [label %13, label %14]
+
+; <label>:13                                      ; preds = %4
+  %tmp14 = phi i32 [ %7, %4 ]
+  store i32 23958, i32* @foo, align 4
+  %tmp15 = load i32* %1, align 4
+  %tmp16 = icmp eq i32 %tmp15, 0
+  %tmp17 = zext i1 %tmp16 to i32
+  %tmp21 = add i32 %tmp17, %tmp14
+  ret i32 %tmp21
+
+; <label>:14                                      ; preds = %4
+  ret i32 42
+}
+
+declare i32 @printf(i8*, ...)

Modified: llvm/branches/R600/test/CodeGen/ARM/integer_insertelement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/integer_insertelement.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/integer_insertelement.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/integer_insertelement.ll Tue Nov 13 09:21:47 2012
@@ -6,7 +6,7 @@
 
 ; CHECK: @f
 ; CHECK-NOT: vorr d
-; CHECK: vmov s
+; CHECK: vmov.32 d
 ; CHECK-NOT: vorr d
 ; CHECK: mov pc, lr
 define <4 x i32> @f(<4 x i32> %in) {

Modified: llvm/branches/R600/test/CodeGen/ARM/vext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/vext.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/vext.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/vext.ll Tue Nov 13 09:21:47 2012
@@ -74,6 +74,39 @@
 	ret <16 x i8> %tmp3
 }
 
+define <16 x i8> @test_vextq_undef_op2(<16 x i8> %a) nounwind {
+;CHECK: test_vextq_undef_op2:
+;CHECK: vext
+entry:
+  %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
+  ret <16 x i8> %tmp1
+}
+
+define <8 x i8> @test_vextd_undef_op2(<8 x i8> %a) nounwind {
+;CHECK: test_vextd_undef_op2:
+;CHECK: vext
+entry:
+  %tmp1 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
+  ret <8 x i8> %tmp1
+}
+
+
+define <16 x i8> @test_vextq_undef_op2_undef(<16 x i8> %a) nounwind {
+;CHECK: test_vextq_undef_op2_undef:
+;CHECK: vext
+entry:
+  %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
+  ret <16 x i8> %tmp1
+}
+
+define <8 x i8> @test_vextd_undef_op2_undef(<8 x i8> %a) nounwind {
+;CHECK: test_vextd_undef_op2_undef:
+;CHECK: vext
+entry:
+  %tmp1 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1>
+  ret <8 x i8> %tmp1
+}
+
 ; Tests for ReconstructShuffle function. Indices have to be carefully
 ; chosen to reach lowering phase as a BUILD_VECTOR.
 

Modified: llvm/branches/R600/test/CodeGen/ARM/vget_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/vget_lane.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/vget_lane.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/vget_lane.ll Tue Nov 13 09:21:47 2012
@@ -200,7 +200,7 @@
 
 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 ;CHECK: vsetQ_lane32:
-;CHECK: vmov s
+;CHECK: vmov.32 d{{.*}}[1], r1
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
 	ret <4 x i32> %tmp2

Added: llvm/branches/R600/test/CodeGen/Mips/alloca16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/alloca16.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/alloca16.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/alloca16.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,75 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at iiii = global i32 25, align 4
+ at jjjj = global i32 35, align 4
+ at kkkk = global i32 100, align 4
+ at t = global i32 25, align 4
+ at riii = common global i32 0, align 4
+ at rjjj = common global i32 0, align 4
+ at rkkk = common global i32 0, align 4
+
+define void @temp(i32 %foo) nounwind {
+entry:
+  %foo.addr = alloca i32, align 4
+  store i32 %foo, i32* %foo.addr, align 4
+  %0 = load i32* %foo.addr, align 4
+  store i32 %0, i32* @t, align 4
+  ret void
+}
+
+define void @test() nounwind {
+entry:
+; 16: 	.frame	$16,24,$ra
+; 16: 	save 	$ra, $s0, $s1, 24
+; 16: 	move	$16, $sp
+; 16:	move	${{[0-9]+}}, $sp
+; 16:	subu	$[[REGISTER:[0-9]+]], ${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	$sp, $[[REGISTER]]
+  %sssi = alloca i32, align 4
+  %ip = alloca i32*, align 4
+  %sssj = alloca i32, align 4
+  %0 = load i32* @iiii, align 4
+  store i32 %0, i32* %sssi, align 4
+  %1 = load i32* @kkkk, align 4
+  %mul = mul nsw i32 %1, 100
+  %2 = alloca i8, i32 %mul
+  %3 = bitcast i8* %2 to i32*
+  store i32* %3, i32** %ip, align 4
+  %4 = load i32* @jjjj, align 4
+  store i32 %4, i32* %sssj, align 4
+  %5 = load i32* @jjjj, align 4
+  %6 = load i32* @iiii, align 4
+  %7 = load i32** %ip, align 4
+  %arrayidx = getelementptr inbounds i32* %7, i32 %6
+  store i32 %5, i32* %arrayidx, align 4
+  %8 = load i32* @kkkk, align 4
+  %9 = load i32* @jjjj, align 4
+  %10 = load i32** %ip, align 4
+  %arrayidx1 = getelementptr inbounds i32* %10, i32 %9
+  store i32 %8, i32* %arrayidx1, align 4
+  %11 = load i32* @iiii, align 4
+  %12 = load i32* @kkkk, align 4
+  %13 = load i32** %ip, align 4
+  %arrayidx2 = getelementptr inbounds i32* %13, i32 %12
+  store i32 %11, i32* %arrayidx2, align 4
+  %14 = load i32** %ip, align 4
+  %arrayidx3 = getelementptr inbounds i32* %14, i32 25
+  %15 = load i32* %arrayidx3, align 4
+  store i32 %15, i32* @riii, align 4
+  %16 = load i32** %ip, align 4
+  %arrayidx4 = getelementptr inbounds i32* %16, i32 35
+  %17 = load i32* %arrayidx4, align 4
+  store i32 %17, i32* @rjjj, align 4
+  %18 = load i32** %ip, align 4
+  %arrayidx5 = getelementptr inbounds i32* %18, i32 100
+  %19 = load i32* %arrayidx5, align 4
+  store i32 %19, i32* @rkkk, align 4
+  %20 = load i32* @t, align 4
+  %21 = load i32** %ip, align 4
+  %arrayidx6 = getelementptr inbounds i32* %21, i32 %20
+  %22 = load i32* %arrayidx6, align 4
+; 16: 	save	16
+  call void @temp(i32 %22)
+; 16: 	restore	16
+  ret void
+}

Modified: llvm/branches/R600/test/CodeGen/Mips/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/atomic.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/atomic.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/atomic.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel --disable-machine-licm < %s | FileCheck %s
 
 @x = common global i32 0, align 4
 
@@ -181,8 +181,9 @@
 
 ; CHECK:   $[[BB0:[A-Z_0-9]+]]:
 ; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK:   and     $[[R18:[0-9]+]], $[[R9]], $[[R6]]
 ; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R9]]
+; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
 ; CHECK:   sc      $[[R14]], 0($[[R2]])
 ; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
 

Added: llvm/branches/R600/test/CodeGen/Mips/atomicops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/atomicops.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/atomicops.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/atomicops.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,40 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at .str = private unnamed_addr constant [8 x i8] c"%d, %d\0A\00", align 1
+
+define i32 @foo(i32* %mem, i32 %val, i32 %c) nounwind {
+entry:
+  %0 = atomicrmw add i32* %mem, i32 %val seq_cst
+  %add = add nsw i32 %0, %c
+  ret i32 %add
+; 16: foo:
+; 16:	lw	${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
+; 16: 	lw	${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
+}
+
+define i32 @main() nounwind {
+entry:
+  %x = alloca i32, align 4
+  store volatile i32 0, i32* %x, align 4
+  %0 = atomicrmw add i32* %x, i32 1 seq_cst
+  %add.i = add nsw i32 %0, 2
+  %1 = load volatile i32* %x, align 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
+  %2 = cmpxchg i32* %x, i32 1, i32 2 seq_cst
+  %3 = load volatile i32* %x, align 4
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
+  %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
+  %5 = load volatile i32* %x, align 4
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
+; 16: main:
+; 16:	lw	${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
+; 16: 	lw	${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
+; 16:	lw	${{[0-9]+}}, %call16(__sync_val_compare_and_swap_4)(${{[0-9]+}})
+; 16:	lw	${{[0-9]+}}, %call16(__sync_lock_test_and_set_4)(${{[0-9]+}})
+
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+

Modified: llvm/branches/R600/test/CodeGen/Mips/brdelayslot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/brdelayslot.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/brdelayslot.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/brdelayslot.ll Tue Nov 13 09:21:47 2012
@@ -19,3 +19,19 @@
 }
 
 declare void @foo2(i32)
+
+; Check that cvt.d.w goes into jalr's delay slot.
+;
+define void @foo3(i32 %a) nounwind {
+entry:
+; Default:     foo3:
+; Default:     jalr
+; Default:     cvt.d.w
+
+  %conv = sitofp i32 %a to double
+  tail call void @foo4(double %conv) nounwind
+  ret void
+}
+
+declare void @foo4(double)
+

Added: llvm/branches/R600/test/CodeGen/Mips/brind.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/brind.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/brind.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/brind.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,40 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at main.L = internal unnamed_addr constant [5 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* blockaddress(@main, %L3), i8* blockaddress(@main, %L4), i8* null], align 4
+ at str = private unnamed_addr constant [2 x i8] c"A\00"
+ at str5 = private unnamed_addr constant [2 x i8] c"B\00"
+ at str6 = private unnamed_addr constant [2 x i8] c"C\00"
+ at str7 = private unnamed_addr constant [2 x i8] c"D\00"
+ at str8 = private unnamed_addr constant [2 x i8] c"E\00"
+
+define i32 @main() nounwind {
+entry:
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str, i32 0, i32 0))
+  br label %L1
+
+L1:                                               ; preds = %entry, %L3
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %L3 ]
+  %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str5, i32 0, i32 0))
+  br label %L2
+
+L2:                                               ; preds = %L1, %L3
+  %i.1 = phi i32 [ %i.0, %L1 ], [ %inc, %L3 ]
+  %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str6, i32 0, i32 0))
+  br label %L3
+
+L3:                                               ; preds = %L2, %L3
+  %i.2 = phi i32 [ %i.1, %L2 ], [ %inc, %L3 ]
+  %puts7 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str7, i32 0, i32 0))
+  %inc = add i32 %i.2, 1
+  %arrayidx = getelementptr inbounds [5 x i8*]* @main.L, i32 0, i32 %i.2
+  %0 = load i8** %arrayidx, align 4
+  indirectbr i8* %0, [label %L1, label %L2, label %L3, label %L4]
+; 16: 	jrc	 ${{[0-9]+}}
+L4:                                               ; preds = %L3
+  %puts8 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str8, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+

Added: llvm/branches/R600/test/CodeGen/Mips/check-noat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/check-noat.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/check-noat.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/check-noat.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,11 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s 
+
+define void @f() nounwind readnone {
+entry:
+; CHECK: f:
+; CHECK: .set  noat
+; CHECK: .set  at
+
+  ret void
+}
+

Added: llvm/branches/R600/test/CodeGen/Mips/eh-dwarf-cfa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/eh-dwarf-cfa.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/eh-dwarf-cfa.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/eh-dwarf-cfa.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,63 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | \
+; RUN:      FileCheck %s -check-prefix=CHECK-MIPS64
+
+declare i8* @llvm.eh.dwarf.cfa(i32) nounwind
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @f1() nounwind {
+entry:
+  %x = alloca [32 x i8], align 1
+  %0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
+  ret i8* %0
+
+; CHECK:        addiu   $sp, $sp, -32
+; CHECK:        addiu   $2, $sp, 32
+}
+
+
+define i8* @f2() nounwind {
+entry:
+  %x = alloca [65536 x i8], align 1
+  %0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
+  ret i8* %0
+
+; check stack size (65536 + 8)
+; CHECK:        lui     $[[R0:[a-z0-9]+]], 65535
+; CHECK:        addiu   $[[R0]], $[[R0]], -8
+; CHECK:        addu    $sp, $sp, $[[R0]]
+
+; check return value ($sp + stack size)
+; CHECK:        lui     $[[R1:[a-z0-9]+]], 1
+; CHECK:        addu    $[[R1]], $sp, $[[R1]]
+; CHECK:        addiu   $2, $[[R1]], 8
+}
+
+
+define i32 @f3() nounwind {
+entry:
+  %x = alloca [32 x i8], align 1
+  %0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
+  %1 = ptrtoint i8* %0 to i32
+  %2 = call i8* @llvm.frameaddress(i32 0)
+  %3 = ptrtoint i8* %2 to i32
+  %add = add i32 %1, %3
+  ret i32 %add
+
+; CHECK:        addiu   $sp, $sp, -40
+
+; check return value ($fp + stack size + $fp)
+; CHECK:        addiu   $[[R0:[a-z0-9]+]], $fp, 40
+; CHECK:        addu    $2, $[[R0]], $fp
+}
+
+
+define i8* @f4() nounwind {
+entry:
+  %x = alloca [32 x i8], align 1
+  %0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
+  ret i8* %0
+
+; CHECK-MIPS64:        daddiu   $sp, $sp, -32
+; CHECK-MIPS64:        daddiu   $2, $sp, 32
+}

Modified: llvm/branches/R600/test/CodeGen/Mips/helloworld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/helloworld.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/helloworld.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/helloworld.ll Tue Nov 13 09:21:47 2012
@@ -24,10 +24,10 @@
 ; C1:	addiu	${{[0-9]+}}, %lo($.str)
 ; C2:	move	$25, ${{[0-9]+}}
 ; C1:	move 	$gp, ${{[0-9]+}}
-; C1:	jalr 	${{[0-9]+}}
+; C1:	jalrc 	${{[0-9]+}}
 ; SR:	restore 	$ra, [[FS]]
 ; PE:	li	$2, 0
-; PE:	jr 	$ra
+; PE:	jrc 	$ra
 
 }
 

Added: llvm/branches/R600/test/CodeGen/Mips/i32k.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/i32k.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/i32k.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/i32k.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16a
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16b
+
+ at .str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
+; 16a:	li	${{[0-9]+}}, 29905
+; 16b:	li	${{[0-9]+}}, 16408
+  %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
+; 16a:	li	${{[0-9]+}}, 49127
+; 16b:	li	${{[0-9]+}}, 35631
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind

Modified: llvm/branches/R600/test/CodeGen/Mips/largeimm1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/largeimm1.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/largeimm1.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/largeimm1.ll Tue Nov 13 09:21:47 2012
@@ -1,7 +1,7 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s
 
-; CHECK: lui $at, 49152
-; CHECK: lui $at, 16384
+; CHECK: lui ${{[0-9]+}}, 49152
+; CHECK: lui ${{[0-9]+}}, 16384
 define void @f() nounwind {
 entry:
   %a1 = alloca [1073741824 x i8], align 1

Modified: llvm/branches/R600/test/CodeGen/Mips/largeimmprinting.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/largeimmprinting.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/largeimmprinting.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/largeimmprinting.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,6 @@
-; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | \
+; RUN: FileCheck %s -check-prefix=64
 
 %struct.S1 = type { [65536 x i8] }
 
@@ -6,9 +8,21 @@
 
 define void @f() nounwind {
 entry:
-; CHECK:  lui $at, 65535
-; CHECK:  addiu $at, $at, -16
-; CHECK:  addu  $sp, $sp, $at
+; 32:  lui $[[R0:[0-9]+]], 65535
+; 32:  addiu $[[R0]], $[[R0]], -24
+; 32:  addu $sp, $sp, $[[R0]]
+; 32:  lui $[[R1:[0-9]+]], 1
+; 32:  addu $[[R1]], $sp, $[[R1]]
+; 32:  sw $ra, 20($[[R1]])
+; 64:  daddiu  $[[R0:[0-9]+]], $zero, 1
+; 64:  dsll  $[[R0]], $[[R0]], 48
+; 64:  daddiu  $[[R0]], $[[R0]], -1
+; 64:  dsll  $[[R0]], $[[R0]], 16
+; 64:  daddiu  $[[R0]], $[[R0]], -48
+; 64:  daddu $sp, $sp, $[[R0]]
+; 64:  lui $[[R1:[0-9]+]], 1
+; 64:  daddu $[[R1]], $sp, $[[R1]]
+; 64:  sd  $ra, 40($[[R1]])
 
   %agg.tmp = alloca %struct.S1, align 1
   %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0

Added: llvm/branches/R600/test/CodeGen/Mips/llcarry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/llcarry.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/llcarry.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/llcarry.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,51 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at i = global i64 4294967295, align 8
+ at j = global i64 15, align 8
+ at ii = global i64 4294967295, align 8
+ at k = common global i64 0, align 8
+ at l = common global i64 0, align 8
+ at m = common global i64 0, align 8
+
+define void @test1() nounwind {
+entry:
+  %0 = load i64* @i, align 8
+  %1 = load i64* @j, align 8
+  %add = add nsw i64 %1, %0
+  store i64 %add, i64* @k, align 8
+; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	${{[0-9]+}}, $t8
+; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  ret void
+}
+
+define void @test2() nounwind {
+entry:
+  %0 = load i64* @i, align 8
+  %1 = load i64* @j, align 8
+  %sub = sub nsw i64 %0, %1
+; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	${{[0-9]+}}, $t8
+; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  store i64 %sub, i64* @l, align 8
+  ret void
+}
+
+define void @test3() nounwind {
+entry:
+  %0 = load i64* @ii, align 8
+  %add = add nsw i64 %0, 15
+; 16:	addiu	${{[0-9]+}}, 15
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	${{[0-9]+}}, $t8
+; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  store i64 %add, i64* @m, align 8
+  ret void
+}
+
+

Modified: llvm/branches/R600/test/CodeGen/Mips/longbranch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/longbranch.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/longbranch.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/longbranch.ll Tue Nov 13 09:21:47 2012
@@ -6,15 +6,15 @@
 define void @foo1(i32 %s) nounwind {
 entry:
 ; O32: bal
-; O32: lui $at, 0
-; O32: addiu $at, $at, {{[0-9]+}} 
-; N64: lui $at, 0
-; N64: daddiu $at, $at, 0
-; N64: dsll $at, $at, 16
-; N64: daddiu $at, $at, 0
+; O32: lui $1, 0
+; O32: addiu $1, $1, {{[0-9]+}} 
+; N64: lui $1, 0
+; N64: daddiu $1, $1, 0
+; N64: dsll $1, $1, 16
+; N64: daddiu $1, $1, 0
 ; N64: bal
-; N64: dsll $at, $at, 16
-; N64: daddiu $at, $at, {{[0-9]+}}  
+; N64: dsll $1, $1, 16
+; N64: daddiu $1, $1, {{[0-9]+}}  
 
   %tobool = icmp eq i32 %s, 0
   br i1 %tobool, label %if.end, label %if.then

Modified: llvm/branches/R600/test/CodeGen/Mips/mips64-sret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/mips64-sret.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/mips64-sret.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/mips64-sret.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O0 < %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O3 < %s | FileCheck %s
 
 %struct.S = type { [8 x i32] }
 
@@ -6,6 +6,8 @@
 
 define void @f(%struct.S* noalias sret %agg.result) nounwind {
 entry:
+; CHECK: daddu $2, $zero, $4
+
   %0 = bitcast %struct.S* %agg.result to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
   ret void

Added: llvm/branches/R600/test/CodeGen/Mips/misha.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/misha.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/misha.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/misha.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,69 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+define i32 @sumc(i8* nocapture %to, i8* nocapture %from, i32) nounwind {
+entry:
+  %sext = shl i32 %0, 16
+  %conv = ashr exact i32 %sext, 16
+  %cmp8 = icmp eq i32 %conv, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %.pre = load i8* %to, align 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %1 = phi i8 [ %.pre, %for.body.lr.ph ], [ %conv4, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %from.addr.09 = phi i8* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i8* %from.addr.09, i32 1
+  %2 = load i8* %from.addr.09, align 1
+  %conv27 = zext i8 %2 to i32
+  %conv36 = zext i8 %1 to i32
+  %add = add nsw i32 %conv36, %conv27
+  %conv4 = trunc i32 %add to i8
+  store i8 %conv4, i8* %to, align 1
+  %inc = add nsw i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %conv
+  br i1 %cmp, label %for.end, label %for.body
+; 16: sumc:
+; 16: 	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
+; 16: 	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
+; 16: sum:
+; 16: 	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
+; 16: 	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i32 undef
+}
+
+define i32 @sum(i16* nocapture %to, i16* nocapture %from, i32) nounwind {
+entry:
+  %sext = shl i32 %0, 16
+  %conv = ashr exact i32 %sext, 16
+  %cmp8 = icmp eq i32 %conv, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %.pre = load i16* %to, align 2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %1 = phi i16 [ %.pre, %for.body.lr.ph ], [ %conv4, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %from.addr.09 = phi i16* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i16* %from.addr.09, i32 1
+  %2 = load i16* %from.addr.09, align 2
+  %conv27 = zext i16 %2 to i32
+  %conv36 = zext i16 %1 to i32
+  %add = add nsw i32 %conv36, %conv27
+  %conv4 = trunc i32 %add to i16
+  store i16 %conv4, i16* %to, align 2
+  %inc = add nsw i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %conv
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i32 undef
+}
+
+

Modified: llvm/branches/R600/test/CodeGen/Mips/null.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/null.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/null.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/null.ll Tue Nov 13 09:21:47 2012
@@ -8,6 +8,6 @@
 ; 16: 	.set	mips16                  # @main
 
 
-; 16:	jr	$ra
+; 16:	jrc	$ra
 
 }

Modified: llvm/branches/R600/test/CodeGen/Mips/o32_cc_byval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/o32_cc_byval.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/o32_cc_byval.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/o32_cc_byval.ll Tue Nov 13 09:21:47 2012
@@ -119,6 +119,16 @@
   ret void
 }
 
+%struct.S4 = type { [4 x i32] }
+
+define void @f5(i64 %a0, %struct.S4* nocapture byval %a1) nounwind {
+entry:
+  tail call void @f6(%struct.S4* byval %a1, i64 %a0) nounwind
+  ret void
+}
+
+declare void @f6(%struct.S4* nocapture byval, i64)
+
 !0 = metadata !{metadata !"int", metadata !1}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}

Added: llvm/branches/R600/test/CodeGen/Mips/remat-immed-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/remat-immed-load.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/remat-immed-load.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/remat-immed-load.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,51 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=64
+
+define void @f0() nounwind {
+entry:
+; 32:  addiu $4, $zero, 1
+; 32:  addiu $4, $zero, 1
+
+  tail call void @foo1(i32 1) nounwind
+  tail call void @foo1(i32 1) nounwind
+  ret void
+}
+
+declare void @foo1(i32)
+
+define void @f3() nounwind {
+entry:
+; 64:  daddiu $4, $zero, 1
+; 64:  daddiu $4, $zero, 1
+
+  tail call void @foo2(i64 1) nounwind
+  tail call void @foo2(i64 1) nounwind
+  ret void
+}
+
+declare void @foo2(i64)
+
+define void @f5() nounwind {
+entry:
+; 32:  lui $4, 1
+; 32:  lui $4, 1
+
+  tail call void @f6(i32 65536) nounwind
+  tail call void @f6(i32 65536) nounwind
+  ret void
+}
+
+declare void @f6(i32)
+
+define void @f7() nounwind {
+entry:
+; 64:  lui $4, 1
+; 64:  lui $4, 1
+
+  tail call void @f8(i64 65536) nounwind
+  tail call void @f8(i64 65536) nounwind
+  ret void
+}
+
+declare void @f8(i64)
+

Added: llvm/branches/R600/test/CodeGen/Mips/selpat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/selpat.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/selpat.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/selpat.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,350 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at t = global i32 10, align 4
+ at f = global i32 199, align 4
+ at a = global i32 1, align 4
+ at b = global i32 10, align 4
+ at c = global i32 1, align 4
+ at z1 = common global i32 0, align 4
+ at z2 = common global i32 0, align 4
+ at z3 = common global i32 0, align 4
+ at z4 = common global i32 0, align 4
+
+define void @calc_seleq() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp eq i32 %0, %1
+  %2 = load i32* @f, align 4
+  %3 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  store i32 %cond, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp eq i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %3, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  store i32 %cond10, i32* @z4, align 4
+  ret void
+}
+
+
+define void @calc_seleqk() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 1
+  %1 = load i32* @t, align 4
+  %2 = load i32* @f, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @z1, align 4
+; 16:	cmpi	${{[0-9]+}}, 1
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp eq i32 %0, 10
+  %cond5 = select i1 %cmp1, i32 %2, i32 %1
+  store i32 %cond5, i32* @z2, align 4
+  %3 = load i32* @b, align 4
+  %cmp6 = icmp eq i32 %3, 3
+  %cond10 = select i1 %cmp6, i32 %2, i32 %1
+  store i32 %cond10, i32* @z3, align 4
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp11 = icmp eq i32 %3, 10
+  %cond15 = select i1 %cmp11, i32 %1, i32 %2
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_seleqz() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 0
+  %1 = load i32* @t, align 4
+  %2 = load i32* @f, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @z1, align 4
+; 16:	beqz	${{[0-9]+}}, .+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp eq i32 %3, 0
+  %cond5 = select i1 %cmp1, i32 %2, i32 %1
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp eq i32 %4, 0
+  %cond10 = select i1 %cmp6, i32 %1, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  store i32 %cond, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selge() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sge i32 %0, %1
+  %2 = load i32* @f, align 4
+  %3 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp sge i32 %1, %0
+  %cond5 = select i1 %cmp1, i32 %3, i32 %2
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp sge i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %3, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp sge i32 %0, %4
+  %cond15 = select i1 %cmp11, i32 %3, i32 %2
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define i32 @calc_selgt() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sgt i32 %0, %1
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %2 = load i32* @f, align 4
+  %3 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+  %cmp1 = icmp sgt i32 %1, %0
+  %cond5 = select i1 %cmp1, i32 %3, i32 %2
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp sgt i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %2, i32 %3
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp sgt i32 %0, %4
+  %cond15 = select i1 %cmp11, i32 %2, i32 %3
+  store i32 %cond15, i32* @z4, align 4
+  ret i32 undef
+}
+
+define void @calc_selle() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sle i32 %0, %1
+  %2 = load i32* @t, align 4
+  %3 = load i32* @f, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp sle i32 %1, %0
+  %cond5 = select i1 %cmp1, i32 %3, i32 %2
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp sle i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %2, i32 %3
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp sle i32 %0, %4
+  %cond15 = select i1 %cmp11, i32 %2, i32 %3
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selltk() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 10
+  %1 = load i32* @t, align 4
+  %2 = load i32* @f, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @z1, align 4
+; 16:	slti	${{[0-9]+}}, {{[0-9]+}}
+; 16:	btnez	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp slt i32 %3, 2
+  %cond5 = select i1 %cmp1, i32 %2, i32 %1
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp sgt i32 %4, 2
+  %cond10 = select i1 %cmp6, i32 %2, i32 %1
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp sgt i32 %0, 2
+  %cond15 = select i1 %cmp11, i32 %2, i32 %1
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+
+define void @calc_selne() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp ne i32 %0, %1
+  %2 = load i32* @t, align 4
+  %3 = load i32* @f, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  store i32 %cond, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp ne i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %3, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  store i32 %cond10, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selnek() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 1
+  %1 = load i32* @f, align 4
+  %2 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @z1, align 4
+; 16:	cmpi	${{[0-9]+}}, 1
+; 16:	btnez	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp ne i32 %0, 10
+  %cond5 = select i1 %cmp1, i32 %2, i32 %1
+  store i32 %cond5, i32* @z2, align 4
+  %3 = load i32* @b, align 4
+  %cmp6 = icmp ne i32 %3, 3
+  %cond10 = select i1 %cmp6, i32 %2, i32 %1
+  store i32 %cond10, i32* @z3, align 4
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	btnez	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp11 = icmp ne i32 %3, 10
+  %cond15 = select i1 %cmp11, i32 %1, i32 %2
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selnez() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 0
+  %1 = load i32* @f, align 4
+  %2 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @z1, align 4
+; 16:	bnez	${{[0-9]+}}, .+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp ne i32 %3, 0
+  %cond5 = select i1 %cmp1, i32 %2, i32 %1
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp ne i32 %4, 0
+  %cond10 = select i1 %cmp6, i32 %1, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  store i32 %cond, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selnez2() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %tobool = icmp ne i32 %0, 0
+  %1 = load i32* @f, align 4
+  %2 = load i32* @t, align 4
+  %cond = select i1 %tobool, i32 %1, i32 %2
+  store i32 %cond, i32* @z1, align 4
+; 16:	bnez	${{[0-9]+}}, .+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %3 = load i32* @b, align 4
+  %tobool1 = icmp ne i32 %3, 0
+  %cond5 = select i1 %tobool1, i32 %2, i32 %1
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %tobool6 = icmp ne i32 %4, 0
+  %cond10 = select i1 %tobool6, i32 %1, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  store i32 %cond, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_seluge() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp uge i32 %0, %1
+  %2 = load i32* @f, align 4
+  %3 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp uge i32 %1, %0
+  %cond5 = select i1 %cmp1, i32 %3, i32 %2
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp uge i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %3, i32 %2
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp uge i32 %0, %4
+  %cond15 = select i1 %cmp11, i32 %3, i32 %2
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selugt() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp ugt i32 %0, %1
+  %2 = load i32* @f, align 4
+  %3 = load i32* @t, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp ugt i32 %1, %0
+  %cond5 = select i1 %cmp1, i32 %3, i32 %2
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp ugt i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %2, i32 %3
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp ugt i32 %0, %4
+  %cond15 = select i1 %cmp11, i32 %2, i32 %3
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define void @calc_selule() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp ule i32 %0, %1
+  %2 = load i32* @t, align 4
+  %3 = load i32* @f, align 4
+  %cond = select i1 %cmp, i32 %2, i32 %3
+  store i32 %cond, i32* @z1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	.+4
+; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
+  %cmp1 = icmp ule i32 %1, %0
+  %cond5 = select i1 %cmp1, i32 %3, i32 %2
+  store i32 %cond5, i32* @z2, align 4
+  %4 = load i32* @c, align 4
+  %cmp6 = icmp ule i32 %4, %0
+  %cond10 = select i1 %cmp6, i32 %2, i32 %3
+  store i32 %cond10, i32* @z3, align 4
+  %cmp11 = icmp ule i32 %0, %4
+  %cond15 = select i1 %cmp11, i32 %2, i32 %3
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/seteq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/seteq.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/seteq.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/seteq.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,21 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at i = global i32 1, align 4
+ at j = global i32 10, align 4
+ at k = global i32 1, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @k, align 4
+  %cmp = icmp eq i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
+; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
+; 16:	move	${{[0-9]+}}, $t8
+  ret void
+}
+

Added: llvm/branches/R600/test/CodeGen/Mips/seteqz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/seteqz.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/seteqz.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/seteqz.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,24 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at i = global i32 0, align 4
+ at j = global i32 99, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	sltiu	${{[0-9]+}}, 1
+; 16:	move	${{[0-9]+}}, $t8
+  %1 = load i32* @j, align 4
+  %cmp1 = icmp eq i32 %1, 99
+  %conv2 = zext i1 %cmp1 to i32
+  store i32 %conv2, i32* @r2, align 4
+; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
+; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
+; 16:	move	${{[0-9]+}}, $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setge.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setge.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setge.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,27 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 -5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+ at .str = private unnamed_addr constant [22 x i8] c"1 = %i\0A1 = %i\0A0 = %i\0A\00", align 1
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @k, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp sge i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
+  %2 = load i32* @m, align 4
+  %cmp1 = icmp sge i32 %0, %2
+  %conv2 = zext i1 %cmp1 to i32
+  store i32 %conv2, i32* @r2, align 4
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setgek.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setgek.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setgek.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setgek.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at k = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @k, align 4
+  %cmp = icmp sgt i32 %0, -32769
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	slti	${{[0-9]+}}, -32768
+; 16:	move	${{[0-9]+}}, $t8
+; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setle.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setle.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setle.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,26 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 -5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %cmp = icmp sle i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
+  %2 = load i32* @m, align 4
+  %cmp1 = icmp sle i32 %2, %1
+  %conv2 = zext i1 %cmp1 to i32
+  store i32 %conv2, i32* @r2, align 4
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setlt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setlt.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setlt.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setlt.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,21 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 -5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %cmp = icmp slt i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	${{[0-9]+}}, $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setltk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setltk.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setltk.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setltk.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 -5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %cmp = icmp slt i32 %0, 10
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	slti	$[[REGISTER:[0-9]+]], 10
+; 16:	move	$[[REGISTER]], $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setne.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setne.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setne.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setne.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at i = global i32 1, align 4
+ at j = global i32 10, align 4
+ at k = global i32 1, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @k, align 4
+  %cmp = icmp ne i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	xor	$[[REGISTER:[0-9]+]], ${{[0-9]+}}
+; 16:	sltu	${{[0-9]+}}, $[[REGISTER]]
+; 16:	move	${{[0-9]+}}, $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setuge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setuge.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setuge.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setuge.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,26 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @k, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp uge i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move    $[[REGISTER:[0-9]+]], $t8
+; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
+  %2 = load i32* @m, align 4
+  %cmp1 = icmp uge i32 %0, %2
+  %conv2 = zext i1 %cmp1 to i32
+  store i32 %conv2, i32* @r2, align 4
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setugt.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setugt.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setugt.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,21 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @k, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp ugt i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move    ${{[0-9]+}}, $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setule.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setule.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setule.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,26 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %cmp = icmp ule i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
+  %2 = load i32* @m, align 4
+  %cmp1 = icmp ule i32 %2, %1
+  %conv2 = zext i1 %cmp1 to i32
+  store i32 %conv2, i32* @r2, align 4
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setult.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setult.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setult.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setult.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,21 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %cmp = icmp ult i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	move	${{[0-9]+}}, $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/setultk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/setultk.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/setultk.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/setultk.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+ at j = global i32 5, align 4
+ at k = global i32 10, align 4
+ at l = global i32 20, align 4
+ at m = global i32 10, align 4
+ at r1 = common global i32 0, align 4
+ at r2 = common global i32 0, align 4
+ at r3 = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %cmp = icmp ult i32 %0, 10
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @r1, align 4
+; 16:	sltiu	$[[REGISTER:[0-9]+]], 10
+; 16:	move	$[[REGISTER]], $t8
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/Mips/stchar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/stchar.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/stchar.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/stchar.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,90 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_h
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_b
+
+ at .str = private unnamed_addr constant [9 x i8] c"%hd %c \0A\00", align 1
+ at sp = common global i16* null, align 4
+ at cp = common global i8* null, align 4
+
+define void @p1(i16 signext %s, i8 signext %c) nounwind {
+entry:
+  %conv = sext i16 %s to i32
+  %conv1 = sext i8 %c to i32
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv, i32 %conv1) nounwind
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define void @p2() nounwind {
+entry:
+  %0 = load i16** @sp, align 4
+  %1 = load i16* %0, align 2
+  %2 = load i8** @cp, align 4
+  %3 = load i8* %2, align 1
+  %conv.i = sext i16 %1 to i32
+  %conv1.i = sext i8 %3 to i32
+  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
+  %4 = load i16** @sp, align 4
+  store i16 32, i16* %4, align 2
+  %5 = load i8** @cp, align 4
+  store i8 97, i8* %5, align 1
+  ret void
+}
+
+define void @test() nounwind {
+entry:
+  %s = alloca i16, align 4
+  %c = alloca i8, align 4
+  store i16 16, i16* %s, align 4
+  store i8 99, i8* %c, align 4
+  store i16* %s, i16** @sp, align 4
+  store i8* %c, i8** @cp, align 4
+  %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
+  %0 = load i16** @sp, align 4
+  store i16 32, i16* %0, align 2
+  %1 = load i8** @cp, align 4
+  store i8 97, i8* %1, align 1
+  %2 = load i16* %s, align 4
+  %3 = load i8* %c, align 4
+  %conv.i = sext i16 %2 to i32
+  %conv1.i = sext i8 %3 to i32
+  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
+  ret void
+; 16_b: test:
+; 16_h: test:
+; 16_b:	sb	${{[0-9]+}}, [[offset1:[0-9]+]](${{[0-9]+}})
+; 16_b: lb      ${{[0-9]+}}, [[offset1]](${{[0-9]+}})
+; 16_h:	sh	${{[0-9]+}}, [[offset2:[0-9]+]](${{[0-9]+}})
+; 16_h: lh      ${{[0-9]+}}, [[offset2]](${{[0-9]+}})
+}
+
+define i32 @main() nounwind {
+entry:
+  %s.i = alloca i16, align 4
+  %c.i = alloca i8, align 4
+  %0 = bitcast i16* %s.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind
+  call void @llvm.lifetime.start(i64 -1, i8* %c.i) nounwind
+  store i16 16, i16* %s.i, align 4
+  store i8 99, i8* %c.i, align 4
+  store i16* %s.i, i16** @sp, align 4
+  store i8* %c.i, i8** @cp, align 4
+  %call.i.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
+  %1 = load i16** @sp, align 4
+  store i16 32, i16* %1, align 2
+  %2 = load i8** @cp, align 4
+  store i8 97, i8* %2, align 1
+  %3 = load i16* %s.i, align 4
+  %4 = load i8* %c.i, align 4
+  %conv.i.i = sext i16 %3 to i32
+  %conv1.i.i = sext i8 %4 to i32
+  %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
+  call void @llvm.lifetime.end(i64 -1, i8* %c.i) nounwind
+  ret i32 0
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+

Modified: llvm/branches/R600/test/CodeGen/Mips/tailcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/tailcall.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/tailcall.ll (original)
+++ llvm/branches/R600/test/CodeGen/Mips/tailcall.ll Tue Nov 13 09:21:47 2012
@@ -4,6 +4,8 @@
 ; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \
 ; RUN: < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic \
+; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=PIC16
 
 @g0 = common global i32 0, align 4
 @g1 = common global i32 0, align 4
@@ -21,6 +23,7 @@
 ; PIC32-NOT: jalr
 ; STATIC32-NOT: jal
 ; N64-NOT: jalr
+; PIC16: jalrc
 
   %call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind
   ret i32 %call
@@ -33,6 +36,7 @@
 ; PIC32: jalr
 ; STATIC32: jal
 ; N64-NOT: jalr
+; PIC16: jalrc
 
   %call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
   ret i32 %call
@@ -45,6 +49,7 @@
 ; PIC32: jalr
 ; STATIC32: jal
 ; N64-NOT: jalr
+; PIC16: jalrc
 
   %call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind
   ret i32 %call
@@ -57,6 +62,7 @@
 ; PIC32: jalr
 ; STATIC32: jal
 ; N64: jalr
+; PIC16: jalrc
 
   %call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind
   ret i32 %call
@@ -66,9 +72,18 @@
 
 define i32 @caller5() nounwind readonly {
 entry:
+; PIC32: .ent caller5
 ; PIC32-NOT: jalr
+; PIC32: .end caller5
+; STATIC32: .ent caller5
 ; STATIC32-NOT: jal
+; STATIC32: .end caller5
+; N64: .ent caller5
 ; N64-NOT: jalr
+; N64: .end caller5
+; PIC16: .ent caller5
+; PIC16: jalrc
+; PIC16: .end caller5
 
   %0 = load i32* @g0, align 4
   %1 = load i32* @g1, align 4
@@ -98,3 +113,133 @@
   ret i32 %add8
 }
 
+declare i32 @callee8(i32, ...)
+
+define i32 @caller8_0() nounwind {
+entry:
+  %call = tail call fastcc i32 @caller8_1()
+  ret i32 %call
+}
+
+define internal fastcc i32 @caller8_1() nounwind noinline {
+entry:
+; PIC32: .ent caller8_1
+; PIC32: jalr
+; PIC32: .end caller8_1
+; STATIC32: .ent caller8_1
+; STATIC32: jal
+; STATIC32: .end caller8_1
+; N64: .ent caller8_1
+; N64-NOT: jalr
+; N64: .end caller8_1
+; PIC16: .ent caller8_1
+; PIC16: jalrc
+; PIC16: .end caller8_1
+
+  %call = tail call i32 (i32, ...)* @callee8(i32 2, i32 1) nounwind
+  ret i32 %call
+}
+
+%struct.S = type { [2 x i32] }
+
+ at gs1 = external global %struct.S
+
+declare i32 @callee9(%struct.S* byval)
+
+define i32 @caller9_0() nounwind {
+entry:
+  %call = tail call fastcc i32 @caller9_1()
+  ret i32 %call
+}
+
+define internal fastcc i32 @caller9_1() nounwind noinline {
+entry:
+; PIC32: .ent caller9_1
+; PIC32: jalr
+; PIC32: .end caller9_1
+; STATIC32: .ent caller9_1
+; STATIC32: jal
+; STATIC32: .end caller9_1
+; N64: .ent caller9_1
+; N64: jalr
+; N64: .end caller9_1
+; PIC16: .ent caller9_1
+; PIC16: jalrc
+; PIC16: .end caller9_1
+
+  %call = tail call i32 @callee9(%struct.S* byval @gs1) nounwind
+  ret i32 %call
+}
+
+declare i32 @callee10(i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+define i32 @caller10(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) nounwind {
+entry:
+; PIC32: .ent caller10
+; PIC32-NOT: jalr
+; STATIC32: .ent caller10
+; STATIC32-NOT: jal
+; N64: .ent caller10
+; N64-NOT: jalr
+; PIC16: .ent caller10
+; PIC16: jalrc
+
+  %call = tail call i32 @callee10(i32 %a8, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind
+  ret i32 %call
+}
+
+declare i32 @callee11(%struct.S* byval)
+
+define i32 @caller11() nounwind noinline {
+entry:
+; PIC32: .ent caller11
+; PIC32: jalr
+; STATIC32: .ent caller11
+; STATIC32: jal
+; N64: .ent caller11
+; N64: jalr
+; PIC16: .ent caller11
+; PIC16: jalrc
+
+  %call = tail call i32 @callee11(%struct.S* byval @gs1) nounwind
+  ret i32 %call
+}
+
+declare i32 @callee12()
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define i32 @caller12(%struct.S* nocapture byval %a0) nounwind {
+entry:
+; PIC32: .ent caller12
+; PIC32: jalr
+; STATIC32: .ent caller12
+; STATIC32: jal
+; N64: .ent caller12
+; N64: jalr
+; PIC16: .ent caller12
+; PIC16: jalrc
+
+  %0 = bitcast %struct.S* %a0 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct.S* @gs1 to i8*), i8* %0, i32 8, i32 4, i1 false)
+  %call = tail call i32 @callee12() nounwind
+  ret i32 %call
+}
+
+declare i32 @callee13(i32, ...)
+
+define i32 @caller13() nounwind {
+entry:
+; PIC32: .ent caller13
+; PIC32-NOT: jalr
+; STATIC32: .ent caller13
+; STATIC32-NOT: jal
+; N64: .ent caller13
+; N64-NOT: jalr
+; PIC16: .ent caller13
+; PIC16: jalrc
+
+  %call = tail call i32 (i32, ...)* @callee13(i32 1, i32 2) nounwind
+  ret i32 %call
+}
+

Added: llvm/branches/R600/test/CodeGen/Mips/tls16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/tls16.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/tls16.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/tls16.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,13 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
+
+ at a = thread_local global i32 4, align 4
+
+define i32 @foo() nounwind readonly {
+entry:
+  %0 = load i32* @a, align 4
+; PIC16:	lw	${{[0-9]+}}, %call16(__tls_get_addr)(${{[0-9]+}})
+; PIC16:	addiu	${{[0-9]+}}, %tlsgd(a)
+  ret i32 %0
+}
+
+

Added: llvm/branches/R600/test/CodeGen/Mips/tls16_2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/tls16_2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/tls16_2.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/tls16_2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,15 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
+
+ at f.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i8* @f(i8* nocapture %a) nounwind {
+entry:
+  %0 = load i32* @f.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f.i, align 4
+  %1 = inttoptr i32 %inc to i8*
+; PIC16: addiu	${{[0-9]+}}, %tlsldm(f.i)
+  ret i8* %1
+}
+
+

Added: llvm/branches/R600/test/CodeGen/PowerPC/asm-Zy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/asm-Zy.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/asm-Zy.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/asm-Zy.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,14 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+
+define i32 @zytest(i32 %a) nounwind {
+entry:
+; CHECK: @zytest
+  %r = call i32 asm "lwbrx $0, ${1:y}", "=r,Z"(i32 %a) nounwind, !srcloc !0
+  ret i32 %r
+; CHECK: lwbrx 3, 0,
+}
+
+!0 = metadata !{i32 101688}
+

Modified: llvm/branches/R600/test/CodeGen/PowerPC/coalesce-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/coalesce-ext.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/coalesce-ext.ll (original)
+++ llvm/branches/R600/test/CodeGen/PowerPC/coalesce-ext.ll Tue Nov 13 09:21:47 2012
@@ -13,5 +13,6 @@
   store volatile i32 %D, i32* %P
   ; Reuse low bits of extended register, don't extend live range of SUM.
   ; CHECK: stw [[EXT]]
-  ret i32 %D
+  %R = add i32 %D, %D
+  ret i32 %R
 }

Added: llvm/branches/R600/test/CodeGen/PowerPC/emptystruct.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/emptystruct.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/emptystruct.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/emptystruct.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,51 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; This tests correct handling of empty aggregate parameters and return values.
+; An empty parameter passed by value does not consume a protocol register or
+; a parameter save area doubleword.  An empty parameter passed by reference
+; is treated as any other pointer parameter.  An empty aggregate return value 
+; is treated as any other aggregate return value, passed via address as a 
+; hidden parameter in GPR3.  In this example, GPR3 contains the return value
+; address, GPR4 contains the address of e2, and e1 and e3 are not passed or
+; received.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.empty = type {}
+
+define void @callee(%struct.empty* noalias sret %agg.result, %struct.empty* byval %a1, %struct.empty* %a2, %struct.empty* byval %a3) nounwind {
+entry:
+  %a2.addr = alloca %struct.empty*, align 8
+  store %struct.empty* %a2, %struct.empty** %a2.addr, align 8
+  %0 = load %struct.empty** %a2.addr, align 8
+  %1 = bitcast %struct.empty* %agg.result to i8*
+  %2 = bitcast %struct.empty* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 0, i32 1, i1 false)
+  ret void
+}
+
+; CHECK: callee:
+; CHECK: std 4,
+; CHECK: std 3,
+; CHECK-NOT: std 5,
+; CHECK-NOT: std 6,
+; CHECK: blr
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define void @caller(%struct.empty* noalias sret %agg.result) nounwind {
+entry:
+  %e1 = alloca %struct.empty, align 1
+  %e2 = alloca %struct.empty, align 1
+  %e3 = alloca %struct.empty, align 1
+  call void @callee(%struct.empty* sret %agg.result, %struct.empty* byval %e1, %struct.empty* %e2, %struct.empty* byval %e3)
+  ret void
+}
+
+; CHECK: caller:
+; CHECK: addi 4,
+; CHECK: std 3,
+; CHECK-NOT: std 5,
+; CHECK-NOT: std 6,
+; CHECK: bl callee

Modified: llvm/branches/R600/test/CodeGen/PowerPC/int-fp-conv-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/int-fp-conv-1.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/int-fp-conv-1.ll (original)
+++ llvm/branches/R600/test/CodeGen/PowerPC/int-fp-conv-1.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=ppc64 | grep __floatditf
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+; CHECK-NOT: __floatditf
 
 define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
 entry:

Added: llvm/branches/R600/test/CodeGen/PowerPC/jaggedstructs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/jaggedstructs.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/jaggedstructs.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/jaggedstructs.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; This tests receiving and re-passing parameters consisting of structures
+; of size 3, 5, 6, and 7.  They are to be found/placed right-adjusted in
+; the parameter registers.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S3 = type { [3 x i8] }
+%struct.S5 = type { [5 x i8] }
+%struct.S6 = type { [6 x i8] }
+%struct.S7 = type { [7 x i8] }
+
+define void @test(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7) nounwind {
+entry:
+  call void @check(%struct.S3* byval %s3, %struct.S5* byval %s5, %struct.S6* byval %s6, %struct.S7* byval %s7)
+  ret void
+}
+
+; CHECK: std 6, 216(1)
+; CHECK: std 5, 208(1)
+; CHECK: std 4, 200(1)
+; CHECK: std 3, 192(1)
+; CHECK: lbz {{[0-9]+}}, 199(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
+; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: sth {{[0-9]+}}, 53(1)
+; CHECK: lbz {{[0-9]+}}, 207(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
+; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stw {{[0-9]+}}, 59(1)
+; CHECK: lhz {{[0-9]+}}, 214(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
+; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: stw {{[0-9]+}}, 66(1)
+; CHECK: lbz {{[0-9]+}}, 223(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: lhz {{[0-9]+}}, 221(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
+; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stw {{[0-9]+}}, 73(1)
+; CHECK: ld 6, 72(1)
+; CHECK: ld 5, 64(1)
+; CHECK: ld 4, 56(1)
+; CHECK: ld 3, 48(1)
+
+declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval)

Added: llvm/branches/R600/test/CodeGen/PowerPC/ppc64-abi-extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/ppc64-abi-extend.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/ppc64-abi-extend.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/ppc64-abi-extend.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,97 @@
+; Verify that i32 argument/return values are extended to i64
+
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+ at si = common global i32 0, align 4
+ at ui = common global i32 0, align 4
+
+declare void @arg_si(i32 signext)
+declare void @arg_ui(i32 zeroext)
+
+declare signext i32 @ret_si()
+declare zeroext i32 @ret_ui()
+
+define void @pass_arg_si() nounwind {
+entry:
+  %0 = load i32* @si, align 4
+  tail call void @arg_si(i32 signext %0) nounwind
+  ret void
+}
+; CHECK: @pass_arg_si
+; CHECK: lwa 3,
+; CHECK: bl arg_si
+
+define void @pass_arg_ui() nounwind {
+entry:
+  %0 = load i32* @ui, align 4
+  tail call void @arg_ui(i32 zeroext %0) nounwind
+  ret void
+}
+; CHECK: @pass_arg_ui
+; CHECK: lwz 3,
+; CHECK: bl arg_ui
+
+define i64 @use_arg_si(i32 signext %x) nounwind readnone {
+entry:
+  %conv = sext i32 %x to i64
+  ret i64 %conv
+}
+; CHECK: @use_arg_si
+; CHECK: %entry
+; CHECK-NEXT: blr
+
+define i64 @use_arg_ui(i32 zeroext %x) nounwind readnone {
+entry:
+  %conv = zext i32 %x to i64
+  ret i64 %conv
+}
+; CHECK: @use_arg_ui
+; CHECK: %entry
+; CHECK-NEXT: blr
+
+define signext i32 @pass_ret_si() nounwind readonly {
+entry:
+  %0 = load i32* @si, align 4
+  ret i32 %0
+}
+; CHECK: @pass_ret_si
+; CHECK: lwa 3,
+; CHECK: blr
+
+define zeroext i32 @pass_ret_ui() nounwind readonly {
+entry:
+  %0 = load i32* @ui, align 4
+  ret i32 %0
+}
+; CHECK: @pass_ret_ui
+; CHECK: lwz 3,
+; CHECK: blr
+
+define i64 @use_ret_si() nounwind {
+entry:
+  %call = tail call signext i32 @ret_si() nounwind
+  %conv = sext i32 %call to i64
+  ret i64 %conv
+}
+; CHECK: @use_ret_si
+; CHECK: bl ret_si
+; This is to verify the return register (3) set up by the ret_si
+; call is passed on unmodified as return value of use_ret_si.
+; CHECK-NOT: 3
+; CHECK: blr
+
+define i64 @use_ret_ui() nounwind {
+entry:
+  %call = tail call zeroext i32 @ret_ui() nounwind
+  %conv = zext i32 %call to i64
+  ret i64 %conv
+}
+; CHECK: @use_ret_ui
+; CHECK: bl ret_ui
+; This is to verify the return register (3) set up by the ret_ui
+; call is passed on unmodified as return value of use_ret_ui.
+; CHECK-NOT: 3
+; CHECK: blr
+

Added: llvm/branches/R600/test/CodeGen/PowerPC/ppc64-align-long-double.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/ppc64-align-long-double.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/ppc64-align-long-double.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/ppc64-align-long-double.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+; Verify internal alignment of long double in a struct.  The double
+; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
+; the long double.  Check that these are stored to proper locations
+; in the parameter save area and loaded from there for return in FPR1/2.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S = type { double, ppc_fp128 }
+
+define ppc_fp128 @test(%struct.S* byval %x) nounwind {
+entry:
+  %b = getelementptr inbounds %struct.S* %x, i32 0, i32 1
+  %0 = load ppc_fp128* %b, align 16
+  ret ppc_fp128 %0
+}
+
+; CHECK: std 6, 72(1)
+; CHECK: std 5, 64(1)
+; CHECK: std 4, 56(1)
+; CHECK: std 3, 48(1)
+; CHECK: lfd 1, 64(1)
+; CHECK: lfd 2, 72(1)
+

Added: llvm/branches/R600/test/CodeGen/PowerPC/pr12757.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/pr12757.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/pr12757.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/pr12757.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,14 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @__flt_rounds() nounwind {
+entry:
+  %0 = tail call i64 asm sideeffect "mffs $0", "=f"() nounwind
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK: @__flt_rounds
+; CHECK: mffs
+

Modified: llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll (original)
+++ llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll Tue Nov 13 09:21:47 2012
@@ -188,17 +188,13 @@
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
-; CHECK: sldi 9, 9, 8
-; CHECK: sldi 8, 8, 16
-; CHECK: sldi 7, 7, 24
-; CHECK: sldi 5, 5, 40
-; CHECK: stw 6, 76(1)
-; CHECK: sth 4, 62(1)
-; CHECK: stb 3, 55(1)
 ; CHECK: std 9, 96(1)
 ; CHECK: std 8, 88(1)
 ; CHECK: std 7, 80(1)
+; CHECK: stw 6, 76(1)
 ; CHECK: std 5, 64(1)
+; CHECK: sth 4, 62(1)
+; CHECK: stb 3, 55(1)
 ; CHECK: lbz {{[0-9]+}}, 85(1)
 ; CHECK: lbz {{[0-9]+}}, 86(1)
 ; CHECK: lbz {{[0-9]+}}, 83(1)

Added: llvm/branches/R600/test/CodeGen/PowerPC/varargs-struct-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/varargs-struct-float.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/varargs-struct-float.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/varargs-struct-float.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.Sf1 = type { float }
+
+define void @foo(float inreg %s.coerce) nounwind {
+entry:
+  %s = alloca %struct.Sf1, align 4
+  %coerce.dive = getelementptr %struct.Sf1* %s, i32 0, i32 0
+  store float %s.coerce, float* %coerce.dive, align 1
+  %coerce.dive1 = getelementptr %struct.Sf1* %s, i32 0, i32 0
+  %0 = load float* %coerce.dive1, align 1
+  call void (i32, ...)* @testvaSf1(i32 1, float inreg %0)
+  ret void
+}
+
+; CHECK: stfs {{[0-9]+}}, 60(1)
+; CHECK: ld 4, 56(1)
+; CHECK: bl
+
+declare void @testvaSf1(i32, ...)

Modified: llvm/branches/R600/test/CodeGen/PowerPC/vec_cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/vec_cmp.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/vec_cmp.ll (original)
+++ llvm/branches/R600/test/CodeGen/PowerPC/vec_cmp.ll Tue Nov 13 09:21:47 2012
@@ -1,6 +1,9 @@
-; RUN: llc -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
 
-; Check vector comparisons using altivec.
+; Check vector comparisons using altivec. For non native types, just basic
+; comparison instruction check is done. For altivec supported type (16i8,
+; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=)
+; are checked.
 
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
@@ -33,13 +36,105 @@
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-define <16 x i8> @v16si8_cmp(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+; Adicional tests for v16i8 since it is a altivec native type
+
+define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
   %cmp = icmp eq <16 x i8> %x, %y
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK: v16si8_cmp:
-; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: v16si8_cmp_eq:
+; CHECK: vcmpequb 2, 2, 3
+
+define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ne <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:     v16si8_cmp_ne:
+; CHECK:     vcmpequb [[RET:[0-9]+]], 2, 3
+; CHECK-NOR: vnor     2, [[RET]], [[RET]]
+
+define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp sle <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16si8_cmp_le:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ule <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16ui8_cmp_le:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp slt <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16si8_cmp_lt:
+; CHECK: vcmpgtsb 2, 3, 2
+
+define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ult <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16ui8_cmp_lt:
+; CHECK: vcmpgtub 2, 3, 2
+
+define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp sgt <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16si8_cmp_gt:
+; CHECK: vcmpgtsb 2, 2, 3
+
+define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp ugt <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK: v16ui8_cmp_gt:
+; CHECK: vcmpgtub 2, 2, 3
+
+define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp sge <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16si8_cmp_ge:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
+entry:
+  %cmp = icmp uge <16 x i8> %x, %y
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  ret <16 x i8> %sext
+}
+; CHECK:      v16ui8_cmp_ge:
+; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
 
 
 define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
@@ -70,13 +165,106 @@
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-define <8 x i16> @v8si16_cmp(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+; Adicional tests for v8i16 since it is an altivec native type
+
+define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
   %cmp = icmp eq <8 x i16> %x, %y
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK: v8si16_cmp:
-; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: v8si16_cmp_eq:
+; CHECK: vcmpequh 2, 2, 3
+
+define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ne <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8si16_cmp_ne:
+; CHECK:      vcmpequh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp sle <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8si16_cmp_le:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ule <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8ui16_cmp_le:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp slt <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8si16_cmp_lt:
+; CHECK: vcmpgtsh 2, 3, 2
+
+define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ult <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8ui16_cmp_lt:
+; CHECK: vcmpgtuh 2, 3, 2
+
+define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp sgt <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8si16_cmp_gt:
+; CHECK: vcmpgtsh 2, 2, 3
+
+define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp ugt <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK: v8ui16_cmp_gt:
+; CHECK: vcmpgtuh 2, 2, 3
+
+define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp sge <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8si16_cmp_ge:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
+entry:
+  %cmp = icmp uge <8 x i16> %x, %y
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+; CHECK:      v8ui16_cmp_ge:
+; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
 
 
 define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
@@ -110,13 +298,106 @@
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-define <4 x i32> @v4si32_cmp(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+; Adicional tests for v4si32 since it is an altivec native type
+
+define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
   %cmp = icmp eq <4 x i32> %x, %y
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK: v4si32_cmp:
-; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: v4si32_cmp_eq:
+; CHECK: vcmpequw 2, 2, 3
+
+define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ne <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4si32_cmp_ne:
+; CHECK:      vcmpequw [[RCMP:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RCMP]], [[RCMP]]
+
+define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp sle <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4si32_cmp_le:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ule <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4ui32_cmp_le:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp slt <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4si32_cmp_lt:
+; CHECK: vcmpgtsw 2, 3, 2
+
+define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ult <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4ui32_cmp_lt:
+; CHECK: vcmpgtuw 2, 3, 2
+
+define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp sgt <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4si32_cmp_gt:
+; CHECK: vcmpgtsw 2, 2, 3
+
+define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp ugt <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK: v4ui32_cmp_gt:
+; CHECK: vcmpgtuw 2, 2, 3
+
+define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp sge <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4si32_cmp_ge:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+
+define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+entry:
+  %cmp = icmp uge <4 x i32> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+; CHECK:      v4ui32_cmp_ge:
+; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
+; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
 
 
 define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
@@ -168,15 +449,70 @@
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-define <4 x float> @v4f32_cmp(<4 x float> %x, <4 x float> %y) nounwind readnone {
+; Adicional tests for v4f32 since it is a altivec native type
+
+define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
   %cmp = fcmp oeq <4 x float> %x, %y
   %sext = sext <4 x i1> %cmp to <4 x i32>
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK: v4f32_cmp:
-; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: v4f32_cmp_eq:
+; CHECK: vcmpeqfp 2, 2, 3
+
+define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp une <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK:      v4f32_cmp_ne:
+; CHECK:      vcmpeqfp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ole <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK:      v4f32_cmp_le:
+; CHECK:      vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
+; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
+; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+
+define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp olt <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_lt:
+; CHECK: vcmpgtfp 2, 3, 2
+
+define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp oge <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_ge:
+; CHECK: vcmpgefp 2, 2, 3
+
+define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ogt <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK: v4f32_cmp_gt:
+; CHECK: vcmpgtfp 2, 2, 3
 
 
 define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {

Added: llvm/branches/R600/test/CodeGen/PowerPC/vec_extload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/vec_extload.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/vec_extload.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/vec_extload.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,155 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector extend load expansion with altivec enabled.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Altivec does not provides an sext intruction, so it expands
+; a set of vector stores (stvx), bytes load/sign expand/store
+; (lbz/stb), and a final vector load (lvx) to load the result
+; extended vector.
+define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
+  %b = trunc <16 x i8> %a to <16 x i4>
+  %c = sext <16 x i4> %b to <16 x i8>
+  ret <16 x i8> %c
+}
+; CHECK: v16si8_sext_in_reg:
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lbz
+; CHECK: stb
+; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+
+; The zero extend uses a more clever logic: a vector splat
+; and a logic and to set higher bits to 0.
+define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) {
+  %b = trunc <16 x i8> %a to <16 x i4>
+  %c = zext <16 x i4> %b to <16 x i8>
+  ret <16 x i8> %c
+}
+; CHECK:      v16si8_zext_in_reg:
+; CHECK:      vspltisb [[VMASK:[0-9]+]], 15
+; CHECK-NEXT: vand 2, 2, [[VMASK]]
+
+; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth).
+define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
+  %b = trunc <8 x i16> %a to <8 x i8>
+  %c = sext <8 x i8> %b to <8 x i16>
+  ret <8 x i16> %c
+}
+; CHECK: v8si16_sext_in_reg:
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lhz
+; CHECK: sth
+; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+
+; Same as v8si16_sext_in_reg, but instead of creating the mask
+; with a splat, loads it from memory.
+define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) {
+  %b = trunc <8 x i16> %a to <8 x i8>
+  %c = zext <8 x i8> %b to <8 x i16>
+  ret <8 x i16> %c
+}
+; CHECK:      v8si16_zext_in_reg:
+; CHECK:      ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2)
+; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]]
+; CHECK-NEXT: vand 2, 2, [[VMASK]]
+
+; Same as v16si8_sext_in_reg, expands to load halfword (lha) and
+; store words (stw).
+define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
+  %b = trunc <4 x i32> %a to <4 x i16>
+  %c = sext <4 x i16> %b to <4 x i32>
+  ret <4 x i32> %c
+}
+; CHECK: v4si32_sext_in_reg:
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: lha
+; CHECK: stw
+; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+
+; Same as v8si16_sext_in_reg.
+define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
+  %b = trunc <4 x i32> %a to <4 x i16>
+  %c = zext <4 x i16> %b to <4 x i32>
+  ret <4 x i32> %c
+}
+; CHECK:      v4si32_zext_in_reg:
+; CHECK:      vspltisw [[VMASK:[0-9]+]], -16
+; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]]
+; CHECK-NEXT: vand 2, 2, [[VMASK]]

Added: llvm/branches/R600/test/CodeGen/PowerPC/vec_sqrt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/vec_sqrt.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/vec_sqrt.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/vec_sqrt.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,71 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec,+fsqrt < %s | FileCheck %s
+
+; Check for vector sqrt expansion using floating-point types, since altivec
+; does not provide an fsqrt instruction for vector.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %val)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float> %val)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double> %val)
+
+define <2 x float> @v2f32_sqrt(<2 x float> %x) nounwind readnone {
+entry:
+  %sqrt = call <2 x float> @llvm.sqrt.v2f32 (<2 x float> %x)
+  ret <2 x float> %sqrt
+}
+; sqrt (<2 x float>) is promoted to sqrt (<4 x float>)
+; CHECK: v2f32_sqrt:
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x float> @v4f32_sqrt(<4 x float> %x) nounwind readnone {
+entry:
+  %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %x)
+  ret <4 x float> %sqrt
+}
+; CHECK: v4f32_sqrt:
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+
+define <8 x float> @v8f32_sqrt(<8 x float> %x) nounwind readnone {
+entry:
+  %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %x)
+  ret <8 x float> %sqrt
+}
+; CHECK: v8f32_sqrt:
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x double> @v2f64_sqrt(<2 x double> %x) nounwind readnone {
+entry:
+  %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %x)
+  ret <2 x double> %sqrt
+}
+; CHECK: v2f64_sqrt:
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x double> @v4f64_sqrt(<4 x double> %x) nounwind readnone {
+entry:
+  %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %x)
+  ret <4 x double> %sqrt
+}
+; CHECK: v4f64_sqrt:
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}

Removed: llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll (original)
+++ llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll (removed)
@@ -1,84 +0,0 @@
-; RUN: llc -march=sparc < %s | FileCheck %s
-
-; Check that all the switches turned into lookup tables by SimplifyCFG are
-; turned back into switches for targets that don't like lookup tables.
-
- at .str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
- at .str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
- at .str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
- at .str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
- at .str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
- at switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
- at switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
- at switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
- at switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
-
-define i32 @f(i32 %c)  {
-entry:
-  %switch.tableidx = sub i32 %c, 42
-  %0 = icmp ult i32 %switch.tableidx, 7
-  br i1 %0, label %switch.lookup, label %return
-
-switch.lookup:
-  %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
-  %switch.load = load i32* %switch.gep
-  ret i32 %switch.load
-
-return:
-  ret i32 15
-
-; CHECK: f:
-; CHECK: %switch.lookup
-; CHECK-NOT: sethi %hi(.Lswitch.table)
-}
-
-declare void @dummy(i8 signext, float)
-
-define void @h(i32 %x) {
-entry:
-  %switch.tableidx = sub i32 %x, 0
-  %0 = icmp ult i32 %switch.tableidx, 4
-  br i1 %0, label %switch.lookup, label %sw.epilog
-
-switch.lookup:
-  %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
-  %switch.load = load i8* %switch.gep
-  %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
-  %switch.load2 = load float* %switch.gep1
-  br label %sw.epilog
-
-sw.epilog:
-  %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
-  %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
-  call void @dummy(i8 signext %a.0, float %b.0)
-  ret void
-
-; CHECK: h:
-; CHECK: %switch.lookup
-; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}})
-; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}})
-}
-
-define i8* @foostring(i32 %x) {
-entry:
-  %switch.tableidx = sub i32 %x, 0
-  %0 = icmp ult i32 %switch.tableidx, 4
-  br i1 %0, label %switch.lookup, label %return
-
-switch.lookup:
-  %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
-  %switch.load = load i8** %switch.gep
-  ret i8* %switch.load
-
-return:
-  ret i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)
-
-; CHECK: foostring:
-; CHECK: %switch.lookup
-; CHECK-NOT: sethi %hi(.Lswitch.table3)
-}
-
-; CHECK-NOT: .Lswitch.table
-; CHECK-NOT: .Lswitch.table1
-; CHECK-NOT: .Lswitch.table2
-; CHECK-NOT: .Lswitch.table3

Modified: llvm/branches/R600/test/CodeGen/Thumb2/carry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Thumb2/carry.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Thumb2/carry.ll (original)
+++ llvm/branches/R600/test/CodeGen/Thumb2/carry.ll Tue Nov 13 09:21:47 2012
@@ -20,3 +20,16 @@
 	%tmp2 = sub i64 %tmp1, %b
 	ret i64 %tmp2
 }
+
+; rdar://12559385
+define i64 @f3(i32 %vi) {
+entry:
+; CHECK: f3:
+; CHECK: movw [[REG:r[0-9]+]], #36102
+; CHECK: sbcs r{{[0-9]+}}, [[REG]]
+    %v0 = zext i32 %vi to i64
+    %v1 = xor i64 %v0, -155057456198619
+    %v4 = add i64 %v1, 155057456198619
+    %v5 = add i64 %v4, %v1
+    ret i64 %v5
+}

Modified: llvm/branches/R600/test/CodeGen/Thumb2/thumb2-uxtb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Thumb2/thumb2-uxtb.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Thumb2/thumb2-uxtb.ll (original)
+++ llvm/branches/R600/test/CodeGen/Thumb2/thumb2-uxtb.ll Tue Nov 13 09:21:47 2012
@@ -128,9 +128,9 @@
 
 ; ARMv7M: test10
 ; ARMv7M: mov.w r1, #16253176
-; ARMv7M: mov.w r2, #458759
 ; ARMv7M: and.w r0, r1, r0, lsr #7
-; ARMv7M: and.w r1, r2, r0, lsr #5
+; ARMv7M: mov.w r1, #458759
+; ARMv7M: and.w r1, r1, r0, lsr #5
 ; ARMv7M: orrs r0, r1
 	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]

Modified: llvm/branches/R600/test/CodeGen/X86/2012-01-18-vbitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/2012-01-18-vbitcast.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/2012-01-18-vbitcast.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/2012-01-18-vbitcast.ll Tue Nov 13 09:21:47 2012
@@ -2,8 +2,8 @@
 
 ;CHECK: vcast
 define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
-;CHECK: pshufd
-;CHECK: pshufd
+;CHECK: pmovzxdq
+;CHECK: pmovzxdq
   %af = bitcast <2 x float> %a to <2 x i32>
   %bf = bitcast <2 x float> %b to <2 x i32>
   %x = sub <2 x i32> %af, %bf

Modified: llvm/branches/R600/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/2012-03-15-build_vector_wl.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/2012-03-15-build_vector_wl.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/2012-03-15-build_vector_wl.ll Tue Nov 13 09:21:47 2012
@@ -4,7 +4,7 @@
 define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
 entry:
   %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: shufb
+; CHECK: pmovzxbd
   ret <4 x i8> %out
 ; CHECK: ret
 }

Modified: llvm/branches/R600/test/CodeGen/X86/2012-07-10-extload64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/2012-07-10-extload64.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/2012-07-10-extload64.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/2012-07-10-extload64.ll Tue Nov 13 09:21:47 2012
@@ -3,7 +3,7 @@
 ; CHECK: load_store
 define void @load_store(<4 x i16>* %in) {
 entry:
-; CHECK: movsd
+; CHECK: pmovzxwd
   %A27 = load <4 x i16>* %in, align 4
   %A28 = add <4 x i16> %A27, %A27
 ; CHECK: movlpd
@@ -27,6 +27,6 @@
 BB:
   %t = load <2 x i32>* %ptr
   ret <2 x i32> %t
-;CHECK: movsd
+;CHECK: pmovzxdq
 ;CHECK: ret
 }

Modified: llvm/branches/R600/test/CodeGen/X86/add-of-carry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/add-of-carry.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/add-of-carry.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/add-of-carry.ll Tue Nov 13 09:21:47 2012
@@ -30,4 +30,17 @@
   ret i32 %z.0
 }
 
+; <rdar://problem/12579915>
+define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+entry:
+  %cmp = icmp ugt i32 %x, %y
+  %dec = sext i1 %cmp to i32
+  %dec.res = add nsw i32 %dec, %res
+  ret i32 %dec.res
+; CHECK: test3:
+; CHECK: cmpl
+; CHECK: sbbl
+; CHECK: ret
+}
+
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone

Added: llvm/branches/R600/test/CodeGen/X86/atom-shuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atom-shuf.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atom-shuf.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/atom-shuf.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=atom | FileCheck %s
+
+define <16 x i8> @foo(<16 x i8> %in) {
+  %r = shufflevector <16 x i8> %in, <16 x i8> undef, <16 x i32> < i32 7, i32 3, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %r
+; CHECK: foo
+; CHECK: pshufb
+; CHECK-NEXT: ret
+}

Added: llvm/branches/R600/test/CodeGen/X86/avx-intel-ocl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/avx-intel-ocl.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/avx-intel-ocl.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/avx-intel-ocl.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,107 @@
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
+
+declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
+declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+; WIN64: testf16_inp
+; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
+; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
+; WIN64: leaq    {{.*}}(%rsp), %rcx
+; WIN64: call
+; WIN64: ret
+
+; WIN32: testf16_inp
+; WIN32: movl    %eax, (%esp)
+; WIN32: vaddps  {{.*}}, {{%ymm[0-1]}}
+; WIN32: vaddps  {{.*}}, {{%ymm[0-1]}}
+; WIN32: call
+; WIN32: ret
+
+; NOT_WIN: testf16_inp
+; NOT_WIN: vaddps  {{.*}}, {{%ymm[0-1]}}
+; NOT_WIN: vaddps  {{.*}}, {{%ymm[0-1]}}
+; NOT_WIN: leaq    {{.*}}(%rsp), %rdi
+; NOT_WIN: call
+; NOT_WIN: ret
+
+;test calling conventions - input parameters
+define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+  %y = alloca <16 x float>, align 16
+  %x = fadd <16 x float> %a, %b
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %2 = load <16 x float>* %y, align 16
+  %3 = fadd <16 x float> %2, %1
+  ret <16 x float> %3
+}
+
+;test calling conventions - preserved registers
+
+; preserved ymm6-ymm15
+; WIN64: testf16_regs
+; WIN64: call
+; WIN64: vaddps  {{%ymm[6-7]}}, %ymm0, %ymm0
+; WIN64: vaddps  {{%ymm[6-7]}}, %ymm1, %ymm1
+; WIN64: ret
+
+; preserved ymm8-ymm15
+; NOT_WIN: testf16_regs
+; NOT_WIN: call
+; NOT_WIN: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
+; NOT_WIN: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
+; NOT_WIN: ret
+
+define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+  %y = alloca <16 x float>, align 16
+  %x = fadd <16 x float> %a, %b
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %2 = load <16 x float>* %y, align 16
+  %3 = fadd <16 x float> %1, %b
+  %4 = fadd <16 x float> %2, %3
+  ret <16 x float> %4
+}
+
+; test calling conventions - prolog and epilog
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64: call
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
+; NOT_WIN: call
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+   %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
+   ret <16 x float> %c
+}

Modified: llvm/branches/R600/test/CodeGen/X86/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/crash.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/crash.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/crash.ll Tue Nov 13 09:21:47 2012
@@ -580,3 +580,12 @@
 bb29:                                             ; preds = %bb28, %bb26, %bb25, %bb21
   unreachable
 }
+
+define void @pr14194() nounwind uwtable {
+  %tmp = load i64* undef, align 16
+  %tmp1 = trunc i64 %tmp to i32
+  %tmp2 = lshr i64 %tmp, 32
+  %tmp3 = trunc i64 %tmp2 to i32
+  %tmp4 = call { i32, i32 } asm sideeffect "", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(i32 %tmp3, i32 undef, i32 %tmp3, i32 %tmp1) nounwind
+ ret void
+}

Added: llvm/branches/R600/test/CodeGen/X86/cvtv2f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/cvtv2f32.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/cvtv2f32.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/cvtv2f32.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
+
+define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) {
+  %t1 = uitofp i32 %x to float
+  %t2 = insertelement <2 x float> undef, float %t1, i32 0
+  %t3 = uitofp i32 %y to float
+  %t4 = insertelement <2 x float> %t2, float %t3, i32 1
+  %t5 = fmul <2 x float> %v, %t4
+  ret <2 x float> %t5
+; CHECK: foo
+; CHECK: or
+; CHECK: subpd
+; CHECK: cvtpd2ps
+; CHECK: ret
+}
+
+define <2 x float> @bar(<2 x i32> %in) {
+  %r = uitofp <2 x i32> %in to <2 x float>
+  ret <2 x float> %r
+; CHECK: bar
+; CHECK: or
+; CHECK: subpd
+; CHECK: cvtpd2ps
+; CHECK: ret
+}

Modified: llvm/branches/R600/test/CodeGen/X86/fast-cc-callee-pops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/fast-cc-callee-pops.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/fast-cc-callee-pops.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/fast-cc-callee-pops.ll Tue Nov 13 09:21:47 2012
@@ -2,12 +2,12 @@
 
 ; Check that a fastcc function pops its stack variables before returning.
 
-define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind {
+define x86_fastcallcc void @func(i64 inreg %X, i64 %Y, float %G, double %Z) nounwind {
         ret void
 ; CHECK: ret{{.*}}20
 }
 
-define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind {
+define x86_thiscallcc void @func2(i32 inreg %X, i64 %Y, float %G, double %Z) nounwind {
         ret void
 ; CHECK: ret{{.*}}20
 }

Modified: llvm/branches/R600/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/fast-cc-merge-stack-adj.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/fast-cc-merge-stack-adj.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/fast-cc-merge-stack-adj.ll Tue Nov 13 09:21:47 2012
@@ -3,7 +3,7 @@
 
 target triple = "i686-pc-linux-gnu"
 
-declare x86_fastcallcc void @func(i32*, i64)
+declare x86_fastcallcc void @func(i32*, i64 inreg)
 
 define x86_fastcallcc void @caller(i32, i64) {
         %X = alloca i32         ; <i32*> [#uses=1]

Modified: llvm/branches/R600/test/CodeGen/X86/fast-cc-pass-in-regs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/fast-cc-pass-in-regs.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/fast-cc-pass-in-regs.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/fast-cc-pass-in-regs.ll Tue Nov 13 09:21:47 2012
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
 ; check that fastcc is passing stuff in regs.
 
-declare x86_fastcallcc i64 @callee(i64)
+declare x86_fastcallcc i64 @callee(i64 inreg)
 
 define i64 @caller() {
         %X = call x86_fastcallcc  i64 @callee( i64 4294967299 )          ; <i64> [#uses=1]
@@ -9,7 +9,7 @@
         ret i64 %X
 }
 
-define x86_fastcallcc i64 @caller2(i64 %X) {
+define x86_fastcallcc i64 @caller2(i64 inreg %X) {
         ret i64 %X
 ; CHECK: mov{{.*}}EAX, ECX
 }

Modified: llvm/branches/R600/test/CodeGen/X86/fp-fast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/fp-fast.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/fp-fast.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/fp-fast.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
 
 ; CHECK: test1
 define float @test1(float %a) {
@@ -35,3 +35,23 @@
   ret float %r
 }
 
+; CHECK: test4
+define float @test4(float %a) {
+; CHECK-NOT: fma
+; CHECK-NOT mul
+; CHECK-NOT: add
+; CHECK: ret
+  %t1 = fmul float %a, 0.0
+  %t2 = fadd float %a, %t1
+  ret float %t2
+}
+
+; CHECK: test5
+define float @test5(float %a) {
+; CHECK-NOT: add
+; CHECK: vxorps
+; CHECK: ret
+  %t1 = fsub float -0.0, %a
+  %t2 = fadd float %a, %t1
+  ret float %t2
+}

Added: llvm/branches/R600/test/CodeGen/X86/inlineasm-sched-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/inlineasm-sched-bug.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/inlineasm-sched-bug.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/inlineasm-sched-bug.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,13 @@
+; PR13504
+; RUN: llc -march=x86 -mcpu=atom <%s | FileCheck %s
+; CHECK: bsfl
+; CHECK-NOT: movl
+
+define i32 @foo(i32 %treemap) nounwind uwtable {
+entry:
+  %sub = sub i32 0, %treemap
+  %and = and i32 %treemap, %sub
+  %0 = tail call i32 asm "bsfl $1,$0\0A\09", "=r,rm,~{dirflag},~{fpsr},~{flags}"(i32 %and) nounwind
+  ret i32 %0
+}
+

Modified: llvm/branches/R600/test/CodeGen/X86/jump_sign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/jump_sign.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/jump_sign.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/jump_sign.ll Tue Nov 13 09:21:47 2012
@@ -219,7 +219,6 @@
 ; by sbb, we should not optimize cmp away.
 define i32 @q(i32 %j.4, i32 %w, i32 %el) {
 ; CHECK: q:
-; CHECK: sub
 ; CHECK: cmp
 ; CHECK-NEXT: sbb
   %tmp532 = add i32 %j.4, %w

Added: llvm/branches/R600/test/CodeGen/X86/misched-balance.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/misched-balance.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/misched-balance.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/misched-balance.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,230 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Verify that misched resource/latency balancy heuristics are sane.
+
+define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
+  i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
+ i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
+  nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+; imull folded loads should be in order and interleaved with addl, never
+; adjacent. Also check that we have no spilling.
+;
+; Since mmult1 IR is already in good order, this effectively ensure
+; the scheduler maintains source order.
+;
+; CHECK: %for.body
+; CHECK-NOT: %rsp
+; CHECK: imull 4
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 8
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 12
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 16
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 20
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 24
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 28
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 32
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 36
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: %end
+for.body:
+  %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
+  %tmp57 = load i32* %tmp56, align 4
+  %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
+  %tmp58 = load i32* %arrayidx12.us.i61, align 4
+  %mul.us.i = mul nsw i32 %tmp58, %tmp57
+  %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
+  %tmp59 = load i32* %arrayidx8.us.i.1, align 4
+  %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
+  %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
+  %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
+  %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
+  %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
+  %tmp61 = load i32* %arrayidx8.us.i.2, align 4
+  %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
+  %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
+  %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
+  %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
+  %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
+  %tmp63 = load i32* %arrayidx8.us.i.3, align 4
+  %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
+  %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
+  %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
+  %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
+  %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
+  %tmp65 = load i32* %arrayidx8.us.i.4, align 4
+  %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
+  %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
+  %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
+  %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
+  %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
+  %tmp67 = load i32* %arrayidx8.us.i.5, align 4
+  %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
+  %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
+  %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
+  %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
+  %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
+  %tmp69 = load i32* %arrayidx8.us.i.6, align 4
+  %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
+  %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
+  %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
+  %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
+  %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
+  %tmp71 = load i32* %arrayidx8.us.i.7, align 4
+  %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
+  %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
+  %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
+  %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
+  %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
+  %tmp73 = load i32* %arrayidx8.us.i.8, align 4
+  %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
+  %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
+  %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
+  %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
+  %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
+  %tmp75 = load i32* %arrayidx8.us.i.9, align 4
+  %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
+  %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
+  %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
+  %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
+  %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
+  store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
+  %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %end, label %for.body
+
+end:
+  ret void
+}
+
+; Unlike the above loop, this IR starts out bad and must be
+; rescheduled.
+;
+; CHECK: %for.body
+; CHECK-NOT: %rsp
+; CHECK: imull 4
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 8
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 12
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 16
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 20
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 24
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 28
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 32
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK: imull 36
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: addl
+; CHECK-NOT: {{imull|rsp}}
+; CHECK: %end
+define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
+  i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
+  i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
+  nounwind uwtable ssp {
+entry:
+  br label %for.body
+for.body:
+  %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
+  %tmp57 = load i32* %tmp56, align 4
+  %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
+  %tmp58 = load i32* %arrayidx12.us.i61, align 4
+  %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
+  %tmp59 = load i32* %arrayidx8.us.i.1, align 4
+  %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
+  %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
+  %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
+  %tmp61 = load i32* %arrayidx8.us.i.2, align 4
+  %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
+  %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
+  %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
+  %tmp63 = load i32* %arrayidx8.us.i.3, align 4
+  %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
+  %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
+  %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
+  %tmp65 = load i32* %arrayidx8.us.i.4, align 4
+  %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
+  %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
+  %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
+  %tmp67 = load i32* %arrayidx8.us.i.5, align 4
+  %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
+  %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
+  %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
+  %tmp69 = load i32* %arrayidx8.us.i.6, align 4
+  %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
+  %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
+  %mul.us.i = mul nsw i32 %tmp58, %tmp57
+  %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
+  %tmp71 = load i32* %arrayidx8.us.i.7, align 4
+  %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
+  %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
+  %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
+  %tmp73 = load i32* %arrayidx8.us.i.8, align 4
+  %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
+  %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
+  %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
+  %tmp75 = load i32* %arrayidx8.us.i.9, align 4
+  %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
+  %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
+  %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
+  %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
+  %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
+  %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
+  %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
+  %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
+  %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
+  %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
+  %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
+  %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
+  %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
+  %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
+  %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
+  %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
+  %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
+  %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
+  %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
+  %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
+  %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
+  store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
+  %indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %end, label %for.body
+
+end:
+  ret void
+}

Modified: llvm/branches/R600/test/CodeGen/X86/mmx-builtins.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/mmx-builtins.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/mmx-builtins.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/mmx-builtins.ll Tue Nov 13 09:21:47 2012
@@ -1043,6 +1043,20 @@
   ret i64 %5
 }
 
+define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: test21_2
+; CHECK: pshufw
+; CHECK: movd
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <2 x i32>
+  %5 = extractelement <2 x i32> %4, i32 0
+  ret i32 %5
+}
+
 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {

Modified: llvm/branches/R600/test/CodeGen/X86/ms-inline-asm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/ms-inline-asm.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/ms-inline-asm.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/ms-inline-asm.ll Tue Nov 13 09:21:47 2012
@@ -38,3 +38,26 @@
 ; CHECK: .att_syntax
 ; CHECK: {{## InlineAsm End|#NO_APP}}
 }
+
+%struct.t18_type = type { i32, i32 }
+
+define i32 @t18() nounwind {
+entry:
+  %foo = alloca %struct.t18_type, align 4
+  %a = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 0
+  store i32 1, i32* %a, align 4
+  %b = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1
+  store i32 2, i32* %b, align 4
+  call void asm sideeffect inteldialect "lea ebx, foo\0A\09mov eax, [ebx].0\0A\09mov [ebx].4, ecx", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %b1 = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1
+  %0 = load i32* %b1, align 4
+  ret i32 %0
+; CHECK: t18
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: lea ebx, foo
+; CHECK: mov eax, [ebx].0
+; CHECK: mov [ebx].4, ecx
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+}

Modified: llvm/branches/R600/test/CodeGen/X86/pointer-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/pointer-vector.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/pointer-vector.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/pointer-vector.ll Tue Nov 13 09:21:47 2012
@@ -81,8 +81,7 @@
 entry:
   %G = load <4 x i8>* %p
 ;CHECK: movl
-;CHECK: movd
-;CHECK: pshufb
+;CHECK: pmovzxbd
 ;CHECK: pand
   %K = inttoptr <4 x i8> %G to <4 x i32*>
 ;CHECK: ret
@@ -105,7 +104,7 @@
 entry:
   %G = load <2 x i8*>* %p
 ;CHECK: movl
-;CHECK: movsd
+;CHECK: pmovzxdq
   %T = bitcast <2 x i8*> %G to <2 x i32*>
 ;CHECK: ret
   ret <2 x i32*> %T

Added: llvm/branches/R600/test/CodeGen/X86/pr14161.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/pr14161.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/pr14161.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/pr14161.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>)
+
+define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
+entry:
+  %2 = load <4 x i32>* %0, align 16
+  %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
+  %4 = extractelement <4 x i32> %3, i32 0
+  %5 = extractelement <4 x i32> %3, i32 1
+  %6 = extractelement <4 x i32> %3, i32 2
+  %7 = extractelement <4 x i32> %3, i32 3
+  %8 = bitcast i32 %4 to <2 x i16>
+  %9 = bitcast i32 %5 to <2 x i16>
+  ret <2 x i16> %8
+; CHECK: good
+; CHECK: pminud
+; CHECK-NEXT: pmovzxwq
+; CHECK: ret
+}
+
+define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
+entry:
+  %2 = load <4 x i32>* %0, align 16
+  %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
+  %4 = extractelement <4 x i32> %3, i32 0
+  %5 = extractelement <4 x i32> %3, i32 1
+  %6 = extractelement <4 x i32> %3, i32 2
+  %7 = extractelement <4 x i32> %3, i32 3
+  %8 = bitcast i32 %4 to <2 x i16>
+  %9 = bitcast i32 %5 to <2 x i16>
+  ret <2 x i16> %9
+; CHECK: bad
+; CHECK: pminud
+; CHECK: pextrd
+; CHECK: pmovzxwq
+; CHECK: ret
+}

Added: llvm/branches/R600/test/CodeGen/X86/pr14204.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/pr14204.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/pr14204.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/pr14204.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=core-avx2 | FileCheck %s
+
+; FIXME: vpmovsxwd should be generated instead of vpmovzxwd followed by
+; SLL/SRA.
+
+define <8 x i32> @foo(<8 x i1> %bar) nounwind readnone {
+entry:
+  %s = sext <8 x i1> %bar to <8 x i32>
+  ret <8 x i32> %s
+; CHECK: foo
+; CHECK: vpmovzxwd
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: ret
+}

Modified: llvm/branches/R600/test/CodeGen/X86/promote.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/promote.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/promote.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/promote.ll Tue Nov 13 09:21:47 2012
@@ -20,7 +20,7 @@
 ; CHECK: shuff_f
 define i32 @shuff_f(<4 x i8>* %A) {
 entry:
-; CHECK: pshufb
+; CHECK: pmovzxbd
 ; CHECK: paddd
 ; CHECK: pshufb
   %0 = load <4 x i8>* %A, align 8

Added: llvm/branches/R600/test/CodeGen/X86/sse-intel-ocl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/sse-intel-ocl.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/sse-intel-ocl.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/sse-intel-ocl.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck -check-prefix=NOT_WIN %s
+
+declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
+declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+; WIN64: testf16_inp
+; WIN64: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN64: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN64: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN64: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN64: leaq    {{.*}}(%rsp), %rcx
+; WIN64: call
+; WIN64: ret
+
+; WIN32: testf16_inp
+; WIN32: movl    %eax, (%esp)
+; WIN32: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN32: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN32: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN32: addps  {{.*}}, {{%xmm[0-3]}}
+; WIN32: call
+; WIN32: ret
+
+; NOT_WIN: testf16_inp
+; NOT_WIN: addps  {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: addps  {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: addps  {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: addps  {{.*}}, {{%xmm[0-3]}}
+; NOT_WIN: leaq    {{.*}}(%rsp), %rdi
+; NOT_WIN: call
+; NOT_WIN: ret
+
+;test calling conventions - input parameters
+define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+  %y = alloca <16 x float>, align 16
+  %x = fadd <16 x float> %a, %b
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %2 = load <16 x float>* %y, align 16
+  %3 = fadd <16 x float> %2, %1
+  ret <16 x float> %3
+}
+
+;test calling conventions - preserved registers
+
+; preserved xmm6-xmm15
+; WIN64: testf16_regs
+; WIN64: call
+; WIN64: addps  {{%xmm[6-9]}}, {{.*}}
+; WIN64: addps  {{%xmm[6-9]}}, {{.*}}
+; WIN64: ret
+
+; preserved xmm8-xmm15
+; NOT_WIN: testf16_regs
+; NOT_WIN: call
+; NOT_WIN: addps  {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: addps  {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: addps  {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: addps  {{%xmm([8-9]|1[0-1])}}, {{.*}}
+; NOT_WIN: ret
+
+define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+  %y = alloca <16 x float>, align 16
+  %x = fadd <16 x float> %a, %b
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %2 = load <16 x float>* %y, align 16
+  %3 = fadd <16 x float> %1, %b
+  %4 = fadd <16 x float> %2, %3
+  ret <16 x float> %4
+}
+
+; test calling conventions - prolog and epilog
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}}  ## 16-byte Spill
+; NOT_WIN: call
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}}  ## 16-byte Reload
+define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+   %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
+   ret <16 x float> %c
+}

Added: llvm/branches/R600/test/CodeGen/X86/sse_partial_update.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/sse_partial_update.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/sse_partial_update.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/sse_partial_update.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem | FileCheck %s
+
+; rdar: 12558838
+; PR14221
+; There is a mismatch between the intrinsic and the actual instruction.
+; The actual instruction has a partial update of dest, while the intrinsic
+; passes through the upper FP values. Here, we make sure the source and
+; destination of rsqrtss are the same.
+define void @t1(<4 x float> %a) nounwind uwtable ssp {
+entry:
+; CHECK: t1:
+; CHECK: rsqrtss %xmm0, %xmm0
+  %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
+  %a.addr.0.extract = extractelement <4 x float> %0, i32 0
+  %conv = fpext float %a.addr.0.extract to double
+  %a.addr.4.extract = extractelement <4 x float> %0, i32 1
+  %conv3 = fpext float %a.addr.4.extract to double
+  tail call void @callee(double %conv, double %conv3) nounwind
+  ret void
+}
+declare void @callee(double, double)
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+define void @t2(<4 x float> %a) nounwind uwtable ssp {
+entry:
+; CHECK: t2:
+; CHECK: rcpss %xmm0, %xmm0
+  %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
+  %a.addr.0.extract = extractelement <4 x float> %0, i32 0
+  %conv = fpext float %a.addr.0.extract to double
+  %a.addr.4.extract = extractelement <4 x float> %0, i32 1
+  %conv3 = fpext float %a.addr.4.extract to double
+  tail call void @callee(double %conv, double %conv3) nounwind
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone

Modified: llvm/branches/R600/test/CodeGen/X86/trunc-ext-ld-st.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/trunc-ext-ld-st.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/trunc-ext-ld-st.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/trunc-ext-ld-st.ll Tue Nov 13 09:21:47 2012
@@ -2,8 +2,7 @@
 
 ;CHECK: load_2_i8
 ; A single 16-bit load
-;CHECK: movzwl
-;CHECK: pshufb
+;CHECK: pmovzxbq
 ;CHECK: paddq
 ;CHECK: pshufb
 ; A single 16-bit store
@@ -19,8 +18,7 @@
 
 ;CHECK: load_2_i16
 ; Read 32-bits
-;CHECK: movd
-;CHECK: pshufb
+;CHECK: pmovzxwq
 ;CHECK: paddq
 ;CHECK: pshufb
 ;CHECK: movd
@@ -33,7 +31,7 @@
 } 
 
 ;CHECK: load_2_i32
-;CHECK: pshufd
+;CHECK: pmovzxdq
 ;CHECK: paddq
 ;CHECK: pshufd
 ;CHECK: ret
@@ -45,8 +43,7 @@
 } 
 
 ;CHECK: load_4_i8
-;CHECK: movd
-;CHECK: pshufb
+;CHECK: pmovzxbd
 ;CHECK: paddd
 ;CHECK: pshufb
 ;CHECK: ret
@@ -58,7 +55,7 @@
 } 
 
 ;CHECK: load_4_i16
-;CHECK: punpcklwd
+;CHECK: pmovzxwd
 ;CHECK: paddd
 ;CHECK: pshufb
 ;CHECK: ret
@@ -70,7 +67,7 @@
 } 
 
 ;CHECK: load_8_i8
-;CHECK: punpcklbw
+;CHECK: pmovzxbw
 ;CHECK: paddw
 ;CHECK: pshufb
 ;CHECK: ret

Modified: llvm/branches/R600/test/CodeGen/X86/vec_compare-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/vec_compare-2.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/vec_compare-2.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/vec_compare-2.ll Tue Nov 13 09:21:47 2012
@@ -10,8 +10,7 @@
 entry:
 ; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: punpcklwd
-; CHECK: pshufd
+; CHECK: pmovzxwq
 ; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
   %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]

Modified: llvm/branches/R600/test/CodeGen/X86/vec_shuffle-26.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/vec_shuffle-26.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/vec_shuffle-26.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/vec_shuffle-26.ll Tue Nov 13 09:21:47 2012
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse41 | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=atom -mattr=+sse41 | FileCheck -check-prefix=ATOM %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 ; Transpose example using the more generic vector shuffle. Return float8
 ; instead of float16
@@ -47,8 +47,8 @@
 ; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
 ; ATOM: lo_hi_shift
 ; ATOM: movhps ([[BASEREG:%[a-z]+]]),
-; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
-; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
+; ATOM: movd %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
+; ATOM: movd %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
   %v.i = bitcast float* %y to <4 x float>*
   %0 = load <4 x float>* %v.i, align 1
   %1 = bitcast float* %x to <1 x i64>*

Modified: llvm/branches/R600/test/CodeGen/X86/vec_shuffle-30.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/vec_shuffle-30.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/vec_shuffle-30.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/vec_shuffle-30.ll Tue Nov 13 09:21:47 2012
@@ -1,21 +1,25 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
-; RUN: grep pshufhw %t | grep -- -95 | count 1
-; RUN: grep shufps %t | count 1
-; RUN: not grep pslldq %t
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
+; CHECK: test
 ; Test case when creating pshufhw, we incorrectly set the higher order bit
 ; for an undef,
 define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
 entry:
+; CHECK-NOT: vmovaps
+; CHECK: vmovlpd
+; CHECK: vpshufhw        $-95
   %0 = load <8 x i16>* %dest
   %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
   store <8 x i16> %1, <8 x i16>* %dest
   ret void
-}                              
+}
 
+; CHECK: test2
 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
 define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
 entry:
+; CHECK-NOT: pslldq
+; CHECK: shufps
   %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
   store <4 x i32> %0, <4 x i32>* %dest
   ret void

Modified: llvm/branches/R600/test/CodeGen/X86/widen_cast-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/widen_cast-1.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/widen_cast-1.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/widen_cast-1.ll Tue Nov 13 09:21:47 2012
@@ -1,5 +1,5 @@
 ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
-; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s
+; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: paddd
 ; CHECK: movl

Modified: llvm/branches/R600/test/CodeGen/X86/widen_load-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/widen_load-2.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/widen_load-2.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/widen_load-2.ll Tue Nov 13 09:21:47 2012
@@ -170,7 +170,7 @@
 ; CHECK: rot
 %i8vec3pack = type { <3 x i8>, i8 }
 define %i8vec3pack  @rot() nounwind {
-; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
+; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
 entry:
   %X = alloca %i8vec3pack, align 4
   %rot = alloca %i8vec3pack, align 4

Added: llvm/branches/R600/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll (added)
+++ llvm/branches/R600/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,32 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+
+; This test checks that common symbols have been allocated addresses honouring
+; the alignment requirement.
+
+ at CS1 = common global i32 0, align 16
+ at CS2 = common global i8 0, align 1
+ at CS3 = common global i32 0, align 16
+
+define i32 @main() nounwind {
+entry:
+    %retval = alloca i32, align 4
+    %ptr = alloca i32, align 4
+    store i32 0, i32* %retval
+    store i32 ptrtoint (i32* @CS3 to i32), i32* %ptr, align 4
+    %0 = load i32* %ptr, align 4
+    %and = and i32 %0, 15
+    %tobool = icmp ne i32 %and, 0
+    br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+    store i32 1, i32* %retval
+    br label %return
+
+if.else:                                          ; preds = %entry
+    store i32 0, i32* %retval
+    br label %return
+
+return:                                           ; preds = %if.else, %if.then
+    %1 = load i32* %retval
+    ret i32 %1
+}

Added: llvm/branches/R600/test/ExecutionEngine/MCJIT/test-data-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/ExecutionEngine/MCJIT/test-data-align.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/ExecutionEngine/MCJIT/test-data-align.ll (added)
+++ llvm/branches/R600/test/ExecutionEngine/MCJIT/test-data-align.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,15 @@
+; RUN:  %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+
+; Check that a variable is always aligned as specified.
+
+ at var = global i32 0, align 32
+define i32 @main() {
+  %addr = ptrtoint i32* @var to i64
+  %mask = and i64 %addr, 31
+  %tst = icmp eq i64 %mask, 0
+  br i1 %tst, label %good, label %bad
+good:
+  ret i32 0
+bad:
+  ret i32 1
+}

Added: llvm/branches/R600/test/Feature/minsize_attr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Feature/minsize_attr.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Feature/minsize_attr.ll (added)
+++ llvm/branches/R600/test/Feature/minsize_attr.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define void @test1() minsize {
+; CHECK: define void @test1() minsize
+        ret void
+}
+

Added: llvm/branches/R600/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll (added)
+++ llvm/branches/R600/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,19 @@
+; This test checks that we are not instrumenting globals
+; that we created ourselves.
+; RUN: opt < %s -asan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_Z3barv() uwtable address_safety {
+entry:
+  %a = alloca i32, align 4
+  call void @_Z3fooPi(i32* %a)
+  ret void
+}
+
+declare void @_Z3fooPi(i32*)
+; We create one global string constant for the stack frame above.
+; Make sure we don't create any other global constants.
+; CHECK: = private constant
+; CHECK-NOT: = private constant

Modified: llvm/branches/R600/test/MC/ARM/basic-thumb-instructions.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/ARM/basic-thumb-instructions.s?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/ARM/basic-thumb-instructions.s (original)
+++ llvm/branches/R600/test/MC/ARM/basic-thumb-instructions.s Tue Nov 13 09:21:47 2012
@@ -259,8 +259,8 @@
 
 @ CHECK: ldr	r1, _foo                @ encoding: [A,0x49]
              @   fixup A - offset: 0, value: _foo, kind: fixup_arm_thumb_cp
-@ CHECK: ldr     r3, #604                @ encoding: [0x97,0x4b]
-@ CHECK: ldr     r3, #368                @ encoding: [0x5c,0x4b]
+@ CHECK: ldr     r3, [pc, #604]         @ encoding: [0x97,0x4b]
+@ CHECK: ldr     r3, [pc, #368]         @ encoding: [0x5c,0x4b]
 
 @------------------------------------------------------------------------------
 @ LDR (register)

Added: llvm/branches/R600/test/MC/ARM/thumb2-b.w-encodingT4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/ARM/thumb2-b.w-encodingT4.s?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/ARM/thumb2-b.w-encodingT4.s (added)
+++ llvm/branches/R600/test/MC/ARM/thumb2-b.w-encodingT4.s Tue Nov 13 09:21:47 2012
@@ -0,0 +1,12 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
+  .syntax unified
+  .globl _func
+.thumb_func _foo
+.space 0x37c6
+_foo:
+ at ------------------------------------------------------------------------------
+@ B (thumb2 b.w encoding T4) rdar://12585795
+ at ------------------------------------------------------------------------------
+        b.w   0x3680c
+
+@ CHECK: b.w	#223244                    @ encoding: [0x6d,0xf0,0x0c,0xb0]

Added: llvm/branches/R600/test/MC/Disassembler/ARM/marked-up-thumb.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/ARM/marked-up-thumb.txt?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/ARM/marked-up-thumb.txt (added)
+++ llvm/branches/R600/test/MC/Disassembler/ARM/marked-up-thumb.txt Tue Nov 13 09:21:47 2012
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -mdis < %s | FileCheck %s
+# CHECK: ldr  <reg:r4>, <mem:[pc, <imm:#32>]>
+0x08 0x4c
+# CHECK: push	{<reg:r1>, <reg:r2>, <reg:r7>}
+0x86 0xb4
+# CHECK: sub	<reg:sp>, <imm:#132>
+0xa1 0xb0

Modified: llvm/branches/R600/test/MC/Disassembler/ARM/thumb-printf.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/ARM/thumb-printf.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/ARM/thumb-printf.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/ARM/thumb-printf.txt Tue Nov 13 09:21:47 2012
@@ -7,17 +7,17 @@
 # CHECK-NEXT:	add	r3, sp, #20
 # CHECK-NEXT:	ldr	r5, [r3], #4
 # CHECK-NEXT:	str	r3, [sp]
-# CHECK-NEXT:	ldr	r3, #52
+# CHECK-NEXT:	ldr	r3, [pc, #52]
 # CHECK-NEXT:	add	r3, pc
 # CHECK-NEXT:	ldr	r0, [r3]
 # CHECK-NEXT:	ldr	r4, [r0]
-# CHECK-NEXT:	ldr	r0, #48
+# CHECK-NEXT:	ldr	r0, [pc, #48]
 # CHECK-NEXT:	add	r0, pc
 # CHECK-NEXT:	ldr	r0, [r0]
 # CHECK-NEXT:	ldr	r0, [r0]
 # CHECK-NEXT:	blx	#191548
 # CHECK-NEXT:	cbnz	r0, #6
-# CHECK-NEXT:	ldr	r1, #40
+# CHECK-NEXT:	ldr	r1, [pc, #40]
 # CHECK-NEXT:	add	r1, pc
 # CHECK-NEXT:	ldr	r1, [r1]
 # CHECK-NEXT:	b	#0

Modified: llvm/branches/R600/test/MC/Disassembler/ARM/thumb-tests.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/ARM/thumb-tests.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/ARM/thumb-tests.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/ARM/thumb-tests.txt Tue Nov 13 09:21:47 2012
@@ -30,7 +30,7 @@
 # CHECK:	ldm	r0!, {r1}
 0x02 0xc8
 
-# CHECK:	ldr	r5, #432
+# CHECK:	ldr	r5, [pc, #432]
 0x6c 0x4d
 
 # CHECK:	str	r0, [r3]

Modified: llvm/branches/R600/test/MC/Disassembler/ARM/thumb1.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/ARM/thumb1.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/ARM/thumb1.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/ARM/thumb1.txt Tue Nov 13 09:21:47 2012
@@ -160,6 +160,7 @@
 # CHECK: ldr r1, [sp]
 # CHECK: ldr r2, [sp, #24]
 # CHECK: ldr r3, [sp, #1020]
+# CHECK: ldr r1, [pc, #12]
 
 
 0x29 0x68
@@ -168,6 +169,7 @@
 0x00 0x99
 0x06 0x9a
 0xff 0x9b
+0x03 0x49
 
 #------------------------------------------------------------------------------
 # LDR (register)

Modified: llvm/branches/R600/test/MC/Disassembler/ARM/thumb2.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/ARM/thumb2.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/ARM/thumb2.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/ARM/thumb2.txt Tue Nov 13 09:21:47 2012
@@ -169,6 +169,9 @@
 
 0x13 0xf5 0xce 0xa9
 
+# CHECK: b.w   #208962
+
+0x33 0xf0 0x21 0xb8 # rdar://12585795
 
 #------------------------------------------------------------------------------
 # BFC

Modified: llvm/branches/R600/test/MC/Disassembler/Mips/mips64.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/Mips/mips64.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/Mips/mips64.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/Mips/mips64.txt Tue Nov 13 09:21:47 2012
@@ -3,7 +3,7 @@
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
 0x00 0x2b 0xd0 0x2d
 
 # CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@
 # CHECK: dsllv $gp, $27, $24
 0x03 0x1b 0xe0 0x14
 
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
 0x00 0x01 0x0f 0xbb
 
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
 0x03 0xc1 0x08 0x17
 
 # CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@
 # CHECK: dsubu $gp, $27, $24
 0x03 0x78 0xe0 0x2f
 
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
 0x8c 0x3b 0xc4 0xcd
 
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
 0x3c 0x01 0x00 0x01
 
 # CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@
 # CHECK: lui $ra, 1
 0x3c 0x1f 0x00 0x01
 
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
 0xac 0x3a 0xc4 0xc9
 
 # CHECK: ld $26, 3958($zero)

Modified: llvm/branches/R600/test/MC/Disassembler/Mips/mips64_le.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/Mips/mips64_le.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/Mips/mips64_le.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/Mips/mips64_le.txt Tue Nov 13 09:21:47 2012
@@ -3,7 +3,7 @@
 # CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
 0x2d 0xd0 0x2b 0x00
 
 # CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@
 # CHECK: dsllv $gp, $27, $24
 0x14 0xe0 0x1b 0x03
 
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
 0xbb 0x0f 0x01 0x00
 
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
 0x17 0x08 0xc1 0x03
 
 # CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@
 # CHECK: dsubu $gp, $27, $24
 0x2f 0xe0 0x78 0x03
 
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
 0xcd 0xc4 0x3b 0x8c
 
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
 0x01 0x00 0x01 0x3c
 
 # CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@
 # CHECK: lui $ra, 1
 0x01 0x00 0x1f 0x3c
 
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
 0xc9 0xc4 0x3a 0xac
 
 # CHECK: ld $26, 3958($zero)

Modified: llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2.txt Tue Nov 13 09:21:47 2012
@@ -3,7 +3,7 @@
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
 0x00 0x2b 0xd0 0x2d
 
 # CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@
 # CHECK: dsllv $gp, $27, $24
 0x03 0x1b 0xe0 0x14
 
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
 0x00 0x01 0x0f 0xbb
 
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
 0x03 0xc1 0x08 0x17
 
 # CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@
 # CHECK: dsubu $gp, $27, $24
 0x03 0x78 0xe0 0x2f
 
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
 0x8c 0x3b 0xc4 0xcd
 
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
 0x3c 0x01 0x00 0x01
 
 # CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@
 # CHECK: lui $ra, 1
 0x3c 0x1f 0x00 0x01
 
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
 0xac 0x3a 0xc4 0xc9
 
 # CHECK: ld $26, 3958($zero)

Modified: llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2_le.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2_le.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2_le.txt (original)
+++ llvm/branches/R600/test/MC/Disassembler/Mips/mips64r2_le.txt Tue Nov 13 09:21:47 2012
@@ -3,7 +3,7 @@
 # CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
 0x2d 0xd0 0x2b 0x00
 
 # CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@
 # CHECK: dsllv $gp, $27, $24
 0x14 0xe0 0x1b 0x03
 
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
 0xbb 0x0f 0x01 0x00
 
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
 0x17 0x08 0xc1 0x03
 
 # CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@
 # CHECK: dsubu $gp, $27, $24
 0x2f 0xe0 0x78 0x03
 
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
 0xcd 0xc4 0x3b 0x8c
 
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
 0x01 0x00 0x01 0x3c
 
 # CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@
 # CHECK: lui $ra, 1
 0x01 0x00 0x1f 0x3c
 
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
 0xc9 0xc4 0x3a 0xac
 
 # CHECK: ld $26, 3958($zero)

Added: llvm/branches/R600/test/MC/Disassembler/X86/marked-up.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Disassembler/X86/marked-up.txt?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/Disassembler/X86/marked-up.txt (added)
+++ llvm/branches/R600/test/MC/Disassembler/X86/marked-up.txt Tue Nov 13 09:21:47 2012
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --mdis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
+
+# CHECK: movq	<mem:<reg:%gs>:8>, <reg:%rcx>
+0x65 0x48 0x8b 0x0c 0x25 0x08 0x00 0x00 0x00
+# CHECK: xorps	<reg:%xmm1>, <reg:%xmm2>
+0x0f 0x57 0xd1

Added: llvm/branches/R600/test/MC/MachO/gen-dwarf-cpp.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/MachO/gen-dwarf-cpp.s?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/MachO/gen-dwarf-cpp.s (added)
+++ llvm/branches/R600/test/MC/MachO/gen-dwarf-cpp.s Tue Nov 13 09:21:47 2012
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump %t | FileCheck %s
+
+# 100 "t.s" 1
+.globl _bar
+_bar:
+	movl	$0, %eax
+L1:	leave
+	ret
+
+// rdar://9275556
+
+// We check that the source name "t.s" is picked up
+// CHECK:                 Dir  Mod Time   File Len   File Name
+// CHECK:                 ---- ---------- ---------- ---------------------------
+// CHECK: file_names[  1]    1 0x00000000 0x00000000 gen-dwarf-cpp.s
+// CHECK: file_names[  2]    0 0x00000000 0x00000000 t.s
+
+// We check that the source line number 100 is picked up before the "movl"
+// CHECK: Address            Line   Column File   ISA Flags
+// CHECK: ------------------ ------ ------ ------ --- -------------
+// CHECK: 0x0000000000000000    102      0      2   0  is_stmt

Added: llvm/branches/R600/test/MC/MachO/gen-dwarf-macro-cpp.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/MachO/gen-dwarf-macro-cpp.s?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/MachO/gen-dwarf-macro-cpp.s (added)
+++ llvm/branches/R600/test/MC/MachO/gen-dwarf-macro-cpp.s Tue Nov 13 09:21:47 2012
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump %t | FileCheck %s
+
+# 1 "foo.S" 2
+.macro switcher
+        ljmp *0x38(%ecx)
+.endmacro
+        switcher NaClSwitchNoSSE, 0
+
+// PR14264 was a crash in the code caused by the .macro not handled correctly
+// rdar://12637628
+
+// We check that the source name "foo.S" is picked up
+// CHECK:                 Dir  Mod Time   File Len   File Name
+// CHECK:                 ---- ---------- ---------- ---------------------------
+// CHECK: file_names[  1]    1 0x00000000 0x00000000 gen-dwarf-macro-cpp.s
+// CHECK: file_names[  2]    0 0x00000000 0x00000000 foo.S

Added: llvm/branches/R600/test/MC/Markup/basic-markup.mc
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Markup/basic-markup.mc?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/Markup/basic-markup.mc (added)
+++ llvm/branches/R600/test/MC/Markup/basic-markup.mc Tue Nov 13 09:21:47 2012
@@ -0,0 +1,16 @@
+// RUN: llvm-mcmarkup %s | FileCheck %s
+
+	push	{<reg:r1>, <reg:r2>, <reg:r7>}
+	sub	<reg:sp>, <imm:#132>
+	ldr	<reg:r0>, <mem:[<reg:r0>, <imm:#4>]>
+
+
+// CHECK: reg
+// CHECK: reg
+// CHECK: reg
+// CHECK: reg
+// CHECK: imm
+// CHECK: reg
+// CHECK: mem
+// CHECK: reg
+// CHECK: imm

Added: llvm/branches/R600/test/MC/Markup/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Markup/lit.local.cfg?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/Markup/lit.local.cfg (added)
+++ llvm/branches/R600/test/MC/Markup/lit.local.cfg Tue Nov 13 09:21:47 2012
@@ -0,0 +1,2 @@
+config.suffixes = ['.mc']
+

Modified: llvm/branches/R600/test/MC/Mips/sext_64_32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/Mips/sext_64_32.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/Mips/sext_64_32.ll (original)
+++ llvm/branches/R600/test/MC/Mips/sext_64_32.ll Tue Nov 13 09:21:47 2012
@@ -2,7 +2,7 @@
 
 ; Sign extend from 32 to 64 was creating nonsense opcodes
 
-; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+; CHECK: sll ${{[a-z0-9]+}}, ${{[a-z0-9]+}}, 0
 
 define i64 @foo(i32 %ival) nounwind readnone {
 entry:
@@ -10,7 +10,7 @@
   ret i64 %conv
 }
 
-; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 0
+; CHECK: dsll32 ${{[a-z0-9]+}}, ${{[a-z0-9]+}}, 0
 
 define i64 @foo_2(i32 %ival_2) nounwind readnone {
 entry:

Added: llvm/branches/R600/test/MC/PowerPC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/PowerPC/lit.local.cfg?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/PowerPC/lit.local.cfg (added)
+++ llvm/branches/R600/test/MC/PowerPC/lit.local.cfg Tue Nov 13 09:21:47 2012
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True

Added: llvm/branches/R600/test/MC/PowerPC/ppc64-relocs-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/PowerPC/ppc64-relocs-01.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/PowerPC/ppc64-relocs-01.ll (added)
+++ llvm/branches/R600/test/MC/PowerPC/ppc64-relocs-01.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,66 @@
+;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3  \
+;; RUN:  -filetype=obj %s -o - | \
+;; RUN: elf-dump --dump-section-data | FileCheck %s
+
+;; FIXME: this file need to be in .s form, change when asm parse is done.
+
+ at number64 = global i64 10, align 8
+
+define i64 @access_int64(i64 %a) nounwind readonly {
+entry:
+  %0 = load i64* @number64, align 8
+  %cmp = icmp eq i64 %0, %a
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+declare double @sin(double) nounwind
+
+define double @test_branch24 (double %x) nounwind readonly {
+entry:
+  %add = call double @sin(double %x) nounwind
+  ret double %add
+}
+
+;; The relocations in .rela.text are the 'number64' load using a
+;; R_PPC64_TOC16_DS against the .toc and the 'sin' external function
+;; address using a R_PPC64_REL24
+;; CHECK:       '.rela.text'
+;; CHECK:       Relocation 0
+;; CHECK-NEXT:  'r_offset',
+;; CHECK-NEXT:  'r_sym', 0x00000006
+;; CHECK-NEXT:  'r_type', 0x0000003f
+;; CHECK:       Relocation 1
+;; CHECK-NEXT:  'r_offset',
+;; CHECK-NEXT:  'r_sym', 0x0000000a
+;; CHECK-NEXT:  'r_type', 0x0000000a
+
+;; The .opd entry for the 'access_int64' function creates 2 relocations:
+;; 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function
+;    address itself);
+;; 2. And a R_PPC64_TOC against no symbol (the linker will replace for the
+;;    module's TOC base).
+;; CHECK:       '.rela.opd'
+;; CHECK:       Relocation 0
+;; CHECK-NEXT:  'r_offset',
+;; CHECK-NEXT:  'r_sym', 0x00000002
+;; CHECK-NEXT:  'r_type', 0x00000026
+;; CHECK:       Relocation 1
+;; CHECK-NEXT:  'r_offset',
+;; CHECK-NEXT:  'r_sym', 0x00000000
+;; CHECK-NEXT:  'r_type', 0x00000033
+
+;; Finally the TOC creates the relocation for the 'number64'.
+;; CHECK:       '.rela.toc'
+;; CHECK:       Relocation 0
+;; CHECK-NEXT:  'r_offset',
+;; CHECK-NEXT:  'r_sym', 0x00000008
+;; CHECK-NEXT:  'r_type', 0x00000026
+
+;; Check if the relocation references are for correct symbols.
+;; CHECK:       Symbol 7
+;; CHECK-NEXT:  'access_int64'
+;; CHECK:       Symbol 8
+;; CHECK-NEXT:  'number64'
+;; CHECK:       Symbol 10
+;; CHECK-NEXT:  'sin'

Added: llvm/branches/R600/test/MC/X86/x86-32-ms-inline-asm.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/X86/x86-32-ms-inline-asm.s?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/X86/x86-32-ms-inline-asm.s (added)
+++ llvm/branches/R600/test/MC/X86/x86-32-ms-inline-asm.s Tue Nov 13 09:21:47 2012
@@ -0,0 +1,60 @@
+// RUN: llvm-mc -x86-asm-syntax=intel -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+mov eax, [ebx].0
+mov [ebx].4, ecx
+
+// CHECK: movl (%ebx), %eax
+// CHECK: encoding: [0x8b,0x03]
+// CHECK: movl %ecx, 4(%ebx)
+// CHECK: encoding: [0x89,0x4b,0x04]
+        
+_t21:                                   ## @t21
+// CHECK: t21
+	mov eax, [4*eax + 4]
+// CHECK: movl 4(,%eax,4), %eax
+// CHECK: # encoding: [0x8b,0x04,0x85,0x04,0x00,0x00,0x00]
+    mov eax, [4*eax][4]
+// CHECK: movl 4(,%eax,4), %eax
+// CHECK: # encoding: [0x8b,0x04,0x85,0x04,0x00,0x00,0x00]
+        
+	mov eax, [esi + eax]
+// CHECK: movl (%esi,%eax), %eax
+// CHECK: # encoding: [0x8b,0x04,0x06]
+	mov eax, [esi][eax]
+// CHECK: movl (%esi,%eax), %eax
+// CHECK: # encoding: [0x8b,0x04,0x06]
+        
+	mov eax, [esi + 4*eax]
+// CHECK: movl (%esi,%eax,4), %eax
+// CHECK: # encoding: [0x8b,0x04,0x86]
+	mov eax, [esi][4*eax]
+// CHECK: movl (%esi,%eax,4), %eax
+// CHECK: # encoding: [0x8b,0x04,0x86]
+
+    mov eax, [esi + eax + 4]
+// CHECK: movl 4(%esi,%eax), %eax
+// CHECK: # encoding: [0x8b,0x44,0x06,0x04]
+	mov eax, [esi][eax + 4]
+// CHECK: movl 4(%esi,%eax), %eax
+// CHECK: # encoding: [0x8b,0x44,0x06,0x04]
+	mov eax, [esi + eax][4]
+// CHECK: movl 4(%esi,%eax), %eax
+// CHECK: # encoding: [0x8b,0x44,0x06,0x04]
+	mov eax, [esi][eax][4]
+// CHECK: movl 4(%esi,%eax), %eax
+// CHECK: # encoding: [0x8b,0x44,0x06,0x04]
+
+	mov eax, [esi + 2*eax + 4]
+// CHECK: movl 4(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x04]
+	mov eax, [esi][2*eax + 4]
+// CHECK: movl 4(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x04]
+	mov eax, [esi + 2*eax][4]
+// CHECK: movl 4(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x04]
+	mov eax, [esi][2*eax][4]
+// CHECK: movl 4(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x04]
+
+	ret

Added: llvm/branches/R600/test/Other/extract-alias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/extract-alias.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Other/extract-alias.ll (added)
+++ llvm/branches/R600/test/Other/extract-alias.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,49 @@
+; RUN: llvm-extract -func foo -S < %s | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
+; RUN: llvm-extract -alias zeda0 -S < %s | FileCheck --check-prefix=ALIAS %s
+; RUN: llvm-extract -ralias .*bar -S < %s | FileCheck --check-prefix=ALIASRE %s
+
+; Both aliases should be converted to declarations
+; CHECK:      @zeda0 = external global i32
+; CHECK:      define i32* @foo() {
+; CHECK-NEXT:  call void @a0bar()
+; CHECK-NEXT:  ret i32* @zeda0
+; CHECK-NEXT: }
+; CHECK:      declare void @a0bar()
+
+; DELETE:      @zed = global i32 0
+; DELETE:      @zeda0 = alias i32* @zed
+; DELETE-NEXT: @a0foo = alias i32* ()* @foo
+; DELETE-NEXT: @a0a0bar = alias void ()* @a0bar
+; DELETE-NEXT: @a0bar = alias void ()* @bar
+; DELETE:      declare i32* @foo()
+; DELETE:      define void @bar() {
+; DELETE-NEXT:  %c = call i32* @foo()
+; DELETE-NEXT:  ret void
+; DELETE-NEXT: }
+
+; ALIAS: @zed = external global i32
+; ALIAS: @zeda0 = alias i32* @zed
+
+; ALIASRE: @a0a0bar = alias void ()* @a0bar
+; ALIASRE: @a0bar = alias void ()* @bar
+; ALIASRE: declare void @bar()
+
+ at zed = global i32 0
+ at zeda0 = alias i32* @zed
+
+ at a0foo = alias i32* ()* @foo
+
+define i32* @foo() {
+  call void @a0bar()
+  ret i32* @zeda0
+}
+
+ at a0a0bar = alias void ()* @a0bar
+
+ at a0bar = alias void ()* @bar
+
+define void @bar() {
+  %c = call i32* @foo()
+  ret void
+}

Added: llvm/branches/R600/test/Other/extract-weak-odr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/extract-weak-odr.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Other/extract-weak-odr.ll (added)
+++ llvm/branches/R600/test/Other/extract-weak-odr.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,23 @@
+; RUN: llvm-extract -func foo -S < %s | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
+
+; Test that we don't convert weak_odr to external definitions.
+
+; CHECK:      @bar = external global i32
+; CHECK:      define weak_odr i32* @foo() {
+; CHECK-NEXT:  ret i32* @bar
+; CHECK-NEXT: }
+
+; DELETE: @bar = weak_odr global i32 42
+; DELETE: declare i32* @foo()
+
+ at bar = weak_odr global i32 42
+
+define weak_odr i32*  @foo() {
+  ret i32* @bar
+}
+
+define void @g() {
+  %c = call i32* @foo()
+  ret void
+}

Modified: llvm/branches/R600/test/Other/extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/extract.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Other/extract.ll (original)
+++ llvm/branches/R600/test/Other/extract.ll Tue Nov 13 09:21:47 2012
@@ -7,18 +7,19 @@
 ; llvm-extract uses lazy bitcode loading, so make sure it correctly reads
 ; from bitcode files in addition to assembly files.
 
-; CHECK: define void @foo() {
+; CHECK: define hidden void @foo() {
 ; CHECK:   ret void
 ; CHECK: }
 
-; The linkonce_odr linkage for foo() should be changed to external linkage.
-; DELETE: declare void @foo()
+; The private linkage for foo() should be changed to external linkage and
+; hidden visibility added.
+; DELETE: declare hidden void @foo()
 ; DELETE: define void @bar() {
 ; DELETE:   call void @foo()
 ; DELETE:   ret void
 ; DELETE: }
 
-define linkonce_odr void @foo() {
+define private void @foo() {
   ret void
 }
 define void @bar() {

Added: llvm/branches/R600/test/Other/link-opts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/link-opts.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Other/link-opts.ll (added)
+++ llvm/branches/R600/test/Other/link-opts.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,13 @@
+;RUN: opt -S -std-link-opts < %s | FileCheck %s
+; Simple test to check that -std-link-opts keeps only the main function.
+
+; CHECK-NOT: define
+; CHECK: define void @main
+; CHECK-NOT: define
+define void @main() {
+  ret void
+}
+
+define void @foo() {
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/BBVectorize/X86/loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/X86/loop1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/X86/loop1.ll (added)
+++ llvm/branches/R600/test/Transforms/BBVectorize/X86/loop1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,53 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; The second check covers the use of alias analysis (with loop unrolling).
+
+define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
+entry:
+  br label %for.body
+; CHECK: @test1
+; CHECK-UNRL: @test1
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8
+  %mul = fmul double %0, %0
+  %mul3 = fmul double %0, %1
+  %add = fadd double %mul, %mul3
+  %add4 = fadd double %1, %1
+  %add5 = fadd double %add4, %0
+  %mul6 = fmul double %0, %add5
+  %add7 = fadd double %add, %mul6
+  %mul8 = fmul double %1, %1
+  %add9 = fadd double %0, %0
+  %add10 = fadd double %add9, %0
+  %mul11 = fmul double %mul8, %add10
+  %add12 = fadd double %add7, %mul11
+  %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+  store double %add12, double* %arrayidx14, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK-NOT: <2 x double>
+; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
+; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
+; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
+; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
+; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
+; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
+; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
+; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
+; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
+; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
+; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
+; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/BBVectorize/X86/simple-ldstr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/X86/simple-ldstr.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/X86/simple-ldstr.ll (added)
+++ llvm/branches/R600/test/Transforms/BBVectorize/X86/simple-ldstr.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,29 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Simple 3-pair chain with loads and stores
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+}
+

Added: llvm/branches/R600/test/Transforms/BBVectorize/X86/simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/X86/simple.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/X86/simple.ll (added)
+++ llvm/branches/R600/test/Transforms/BBVectorize/X86/simple.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,103 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK-NOT: fmul <2 x double>
+; CHECK: ret double %R
+}
+
+; Basic chain
+define double @test1a(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%W1 = fadd double %Y1, %Z1
+	%W2 = fadd double %Y2, %Z2
+	%V1 = fadd double %W1, %Z1
+	%V2 = fadd double %W2, %Z2
+	%Q1 = fadd double %W1, %V1
+	%Q2 = fadd double %W2, %V2
+	%S1 = fadd double %W1, %Q1
+	%S2 = fadd double %W2, %Q2
+	%R  = fmul double %S1, %S2
+	ret double %R
+; CHECK: @test1a
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %W1 = fadd <2 x double> %Y1, %Z1
+; CHECK: %V1 = fadd <2 x double> %W1, %Z1
+; CHECK: %Q1 = fadd <2 x double> %W1, %V1
+; CHECK: %S1 = fadd <2 x double> %W1, %Q1
+; CHECK: %S1.v.r1 = extractelement <2 x double> %S1, i32 0
+; CHECK: %S1.v.r2 = extractelement <2 x double> %S1, i32 1
+; CHECK: %R = fmul double %S1.v.r1, %S1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair permuted)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y1, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK-NOT: fmul <2 x double>
+; CHECK: ret double %R
+}
+
+; Basic depth-4 chain (internal permutation)
+define double @test4(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y1, %B2
+	%W1 = fadd double %Y2, %Z1
+	%W2 = fadd double %Y1, %Z2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK-NOT: fmul <2 x double>
+; CHECK: ret double %R
+}
+
+; Basic chain with shuffles
+define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
+	%X1 = sub <8 x i8> %A1, %B1
+	%X2 = sub <8 x i8> %A2, %B2
+	%Y1 = mul <8 x i8> %X1, %A1
+	%Y2 = mul <8 x i8> %X2, %A2
+	%Z1 = add <8 x i8> %Y1, %B1
+	%Z2 = add <8 x i8> %Y2, %B2
+        %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
+        %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
+	%R  = mul <8 x i8> %Q1, %Q2
+	ret <8 x i8> %R
+; CHECK: @test6
+; CHECK-NOT: sub <16 x i8>
+; CHECK: ret <8 x i8>
+}
+

Added: llvm/branches/R600/test/Transforms/BBVectorize/X86/vs-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/X86/vs-cast.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/X86/vs-cast.ll (added)
+++ llvm/branches/R600/test/Transforms/BBVectorize/X86/vs-cast.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,12 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define void @main() nounwind uwtable {
+entry:
+  %0 = bitcast <2 x i64> undef to i128
+  %1 = bitcast <2 x i64> undef to i128
+  ret void
+; CHECK: @main
+}
+

Modified: llvm/branches/R600/test/Transforms/BBVectorize/cycle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/cycle.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/cycle.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/cycle.ll Tue Nov 13 09:21:47 2012
@@ -107,6 +107,6 @@
   ret void
 ; CHECK: @test1
 ; CHECK: go:
-; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
+; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
 ; FIXME: When tree pruning is deterministic, include the entire output.
 }

Modified: llvm/branches/R600/test/Transforms/BBVectorize/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/lit.local.cfg?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/lit.local.cfg (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/lit.local.cfg Tue Nov 13 09:21:47 2012
@@ -1 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+

Modified: llvm/branches/R600/test/Transforms/BBVectorize/loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/loop1.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/loop1.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/loop1.ll Tue Nov 13 09:21:47 2012
@@ -42,8 +42,8 @@
 ; CHECK: %mul = fmul double %0, %0
 ; CHECK: %mul3 = fmul double %0, %1
 ; CHECK: %add = fadd double %mul, %mul3
-; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
 ; CHECK: %mul8 = fmul double %1, %1
+; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
 ; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
 ; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
 ; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0

Modified: llvm/branches/R600/test/Transforms/BBVectorize/search-limit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/search-limit.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/search-limit.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/search-limit.ll Tue Nov 13 09:21:47 2012
@@ -7,8 +7,8 @@
 ; CHECK-SL4: @test1
 ; CHECK-SL4-NOT: <2 x double>
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2

Modified: llvm/branches/R600/test/Transforms/BBVectorize/simple-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/simple-int.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/simple-int.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/simple-int.ll Tue Nov 13 09:21:47 2012
@@ -17,8 +17,8 @@
 	ret double %R
 ; CHECK: @test1
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
 ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
@@ -43,8 +43,8 @@
 	ret double %R
 ; CHECK: @test2
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
 ; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
@@ -68,8 +68,8 @@
 	ret double %R
 ; CHECK: @test3
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
 ; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)

Modified: llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll Tue Nov 13 09:21:47 2012
@@ -2,6 +2,9 @@
 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
 
+; FIXME: re-enable this once pointer vectors work properly
+; XFAIL: *
+
 ; Simple 3-pair chain also with loads and stores (using ptrs and gep)
 define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
 entry:

Modified: llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/simple-ldstr.ll Tue Nov 13 09:21:47 2012
@@ -94,13 +94,13 @@
 ; CHECK-AO: @test3
 ; CHECK-AO: %i0 = load double* %a, align 8
 ; CHECK-AO: %i1 = load double* %b, align 8
-; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
-; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
 ; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
 ; CHECK-AO: %i3 = load double* %arrayidx3, align 8
 ; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1
 ; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
 ; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
+; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
 ; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
 ; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
 ; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
@@ -108,3 +108,63 @@
 ; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
 ; CHECK-AO: ret void
 }
+
+; Simple 3-pair chain with loads and stores (unreachable)
+define void @test4(i1 %bool, double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  br i1 %bool, label %if.then1, label %if.end
+
+if.then1:
+  unreachable
+  br label %if.then
+
+if.then:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  br label %if.end
+
+if.end:
+  ret void
+; CHECK: @test4
+; CHECK-NOT: <2 x double>
+; CHECK-AO: @test4
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple 3-pair chain with loads and stores
+define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, double* %c, align 4
+  ret void
+; CHECK: @test5
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
+; CHECK: ret void
+; CHECK-AO: @test5
+; CHECK-AO-NOT: <2 x double>
+}
+

Modified: llvm/branches/R600/test/Transforms/BBVectorize/simple-sel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/simple-sel.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/simple-sel.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/simple-sel.ll Tue Nov 13 09:21:47 2012
@@ -6,8 +6,8 @@
 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
 ; CHECK: @test1
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2
@@ -33,8 +33,8 @@
 ; CHECK: @test2
 ; CHECK-NB: @test2
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2

Modified: llvm/branches/R600/test/Transforms/BBVectorize/simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/BBVectorize/simple.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/BBVectorize/simple.ll (original)
+++ llvm/branches/R600/test/Transforms/BBVectorize/simple.ll Tue Nov 13 09:21:47 2012
@@ -5,8 +5,8 @@
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test1
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2
@@ -29,8 +29,8 @@
 define double @test2(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test2
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2
@@ -40,12 +40,13 @@
 ; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
 	%Z1 = fadd double %Y2, %B1
 	%Z2 = fadd double %Y1, %B2
-; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0
+; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1
+; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2
 	%R  = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0
+; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1
+; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1
 	ret double %R
 ; CHECK: ret double %R
 }
@@ -54,8 +55,8 @@
 define double @test3(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test3
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2
@@ -79,8 +80,8 @@
 define double @test4(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test4
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2
@@ -148,4 +149,27 @@
 ; CHECK: ret <8 x i8> %R
 }
 
+; Basic depth-3 chain (flipped order)
+define double @test7(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test7
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z2 = fadd double %Y2, %B2
+	%Z1 = fadd double %Y1, %B1
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
 

Modified: llvm/branches/R600/test/Transforms/GVN/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/GVN/crash.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/GVN/crash.ll (original)
+++ llvm/branches/R600/test/Transforms/GVN/crash.ll Tue Nov 13 09:21:47 2012
@@ -163,3 +163,39 @@
   ret i8 %1
 }
 
+
+; Test that a GEP in an unreachable block with the following form doesn't crash
+; GVN:
+;
+;    %x = gep %some.type %x, ...
+
+%struct.type = type { i64, i32, i32 }
+
+define fastcc void @func() nounwind uwtable ssp align 2 {
+entry:
+  br label %reachable.bb
+
+;; Unreachable code.
+
+unreachable.bb:
+  %gep.val = getelementptr inbounds %struct.type* %gep.val, i64 1
+  br i1 undef, label %u2.bb, label %u1.bb
+
+u1.bb:
+  %tmp1 = getelementptr inbounds %struct.type* %gep.val, i64 0, i32 0
+  store i64 -1, i64* %tmp1, align 8
+  br label %unreachable.bb
+
+u2.bb:
+  %0 = load i32* undef, align 4
+  %conv.i.i.i.i.i = zext i32 %0 to i64
+  br label %u2.bb
+
+;; Reachable code.
+
+reachable.bb:
+  br label %r1.bb
+
+r1.bb:
+  br label %u2.bb
+}

Added: llvm/branches/R600/test/Transforms/GVN/pr14166.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/GVN/pr14166.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/GVN/pr14166.ll (added)
+++ llvm/branches/R600/test/Transforms/GVN/pr14166.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,27 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32"
+target triple = "i386-pc-linux-gnu"
+define <2 x i32> @test1() {
+  %v1 = alloca <2 x i32>
+  call void @anything(<2 x i32>* %v1)
+  %v2 = load <2 x i32>* %v1
+  %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
+  %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
+  store <2 x i8*> %v3, <2 x i8*>* %v4
+  %v5 = load <2 x i32>* %v1
+  ret <2 x i32> %v5
+; CHECK: @test1
+; CHECK: %v1 = alloca <2 x i32>
+; CHECK: call void @anything(<2 x i32>* %v1)
+; CHECK: %v2 = load <2 x i32>* %v1
+; CHECK: %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
+; CHECK: %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
+; CHECK: store <2 x i8*> %v3, <2 x i8*>* %v4
+; CHECK: %1 = ptrtoint <2 x i8*> %v3 to <2 x i32>
+; CHECK: %2 = bitcast <2 x i32> %1 to i64
+; CHECK: %3 = bitcast i64 %2 to <2 x i32>
+; CHECK: ret <2 x i32> %3
+}
+
+declare void @anything(<2 x i32>*)
+

Added: llvm/branches/R600/test/Transforms/IndVarSimplify/verify-scev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/IndVarSimplify/verify-scev.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/IndVarSimplify/verify-scev.ll (added)
+++ llvm/branches/R600/test/Transforms/IndVarSimplify/verify-scev.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,421 @@
+; RUN: opt < %s -S -indvars -verify-scev
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @test1() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 false, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  br i1 undef, label %for.end11, label %for.body3
+
+for.body3:                                        ; preds = %for.end
+  unreachable
+
+for.end11:                                        ; preds = %for.end
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %for.end11
+  unreachable
+
+while.end:                                        ; preds = %for.end11
+  br i1 undef, label %if.end115, label %for.cond109
+
+for.cond109:                                      ; preds = %while.end
+  unreachable
+
+if.end115:                                        ; preds = %while.end
+  br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612
+
+while.body119.lr.ph.lr.ph:                        ; preds = %if.end115
+  br i1 undef, label %for.cond612, label %if.end123.us
+
+if.end123.us:                                     ; preds = %while.body119.lr.ph.lr.ph
+  br label %for.cond132.us
+
+for.cond132.us:                                   ; preds = %for.cond132.us, %if.end123.us
+  br i1 undef, label %if.then136.us, label %for.cond132.us
+
+if.then136.us:                                    ; preds = %for.cond132.us
+  br i1 undef, label %while.end220, label %while.body211
+
+while.body211:                                    ; preds = %while.body211, %if.then136.us
+  br i1 undef, label %while.end220, label %while.body211
+
+while.end220:                                     ; preds = %while.body211, %if.then136.us
+  br label %for.cond246.outer
+
+for.cond246.outer:                                ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220
+  br label %for.cond246
+
+for.cond246:                                      ; preds = %for.cond372.loopexit, %for.cond246.outer
+  br i1 undef, label %for.end562, label %if.end250
+
+if.end250:                                        ; preds = %for.cond246
+  br i1 undef, label %if.end256, label %for.end562
+
+if.end256:                                        ; preds = %if.end250
+  %cmp272 = icmp eq i32 undef, undef
+  br i1 %cmp272, label %if.then274, label %for.cond404.preheader
+
+for.cond404.preheader:                            ; preds = %if.end256
+  br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph
+
+for.body409.lr.ph:                                ; preds = %for.cond404.preheader
+  br label %for.body409
+
+if.then274:                                       ; preds = %if.end256
+  br i1 undef, label %for.cond246.outer, label %if.end309
+
+if.end309:                                        ; preds = %if.then274
+  br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.body361:                                      ; preds = %for.body361, %if.end309
+  br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.cond372.loopexit:                             ; preds = %for.body361, %if.end309
+  br i1 undef, label %for.cond394.preheader, label %for.cond246
+
+for.cond394.preheader:                            ; preds = %for.cond372.loopexit
+  br i1 undef, label %for.cond246.outer, label %for.body397
+
+for.body397:                                      ; preds = %for.cond394.preheader
+  unreachable
+
+for.body409:                                      ; preds = %for.inc558, %for.body409.lr.ph
+  %k.029 = phi i32 [ 1, %for.body409.lr.ph ], [ %inc559, %for.inc558 ]
+  br i1 undef, label %if.then412, label %if.else433
+
+if.then412:                                       ; preds = %for.body409
+  br label %if.end440
+
+if.else433:                                       ; preds = %for.body409
+  br label %if.end440
+
+if.end440:                                        ; preds = %if.else433, %if.then412
+  br i1 undef, label %for.inc558, label %if.end461
+
+if.end461:                                        ; preds = %if.end440
+  br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.body517:                                      ; preds = %for.body517, %if.end461
+  br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.cond528.loopexit:                             ; preds = %for.body517, %if.end461
+  br label %for.inc558
+
+for.inc558:                                       ; preds = %for.cond528.loopexit, %if.end440
+  %inc559 = add nsw i32 %k.029, 1
+  %cmp407 = icmp sgt i32 %inc559, undef
+  br i1 %cmp407, label %for.cond246.outer, label %for.body409
+
+for.end562:                                       ; preds = %if.end250, %for.cond246
+  unreachable
+
+for.cond612:                                      ; preds = %while.body119.lr.ph.lr.ph, %if.end115
+  unreachable
+}
+
+define void @test2() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  br i1 undef, label %for.end11, label %for.body3
+
+for.body3:                                        ; preds = %for.end
+  unreachable
+
+for.end11:                                        ; preds = %for.end
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %for.end11
+  unreachable
+
+while.end:                                        ; preds = %for.end11
+  br i1 undef, label %if.end115, label %for.cond109
+
+for.cond109:                                      ; preds = %while.end
+  unreachable
+
+if.end115:                                        ; preds = %while.end
+  br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612
+
+while.body119.lr.ph.lr.ph:                        ; preds = %if.end115
+  br i1 undef, label %for.cond612, label %if.end123.us
+
+if.end123.us:                                     ; preds = %while.body119.lr.ph.lr.ph
+  br label %for.cond132.us
+
+for.cond132.us:                                   ; preds = %for.cond132.us, %if.end123.us
+  br i1 undef, label %if.then136.us, label %for.cond132.us
+
+if.then136.us:                                    ; preds = %for.cond132.us
+  br i1 undef, label %while.end220, label %while.body211
+
+while.body211:                                    ; preds = %while.body211, %if.then136.us
+  br i1 undef, label %while.end220, label %while.body211
+
+while.end220:                                     ; preds = %while.body211, %if.then136.us
+  br label %for.cond246.outer
+
+for.cond246.outer:                                ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220
+  br label %for.cond246
+
+for.cond246:                                      ; preds = %for.cond372.loopexit, %for.cond246.outer
+  br i1 undef, label %for.end562, label %if.end250
+
+if.end250:                                        ; preds = %for.cond246
+  br i1 undef, label %if.end256, label %for.end562
+
+if.end256:                                        ; preds = %if.end250
+  %0 = load i32* undef, align 4
+  br i1 undef, label %if.then274, label %for.cond404.preheader
+
+for.cond404.preheader:                            ; preds = %if.end256
+  %add406 = add i32 0, %0
+  br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph
+
+for.body409.lr.ph:                                ; preds = %for.cond404.preheader
+  br label %for.body409
+
+if.then274:                                       ; preds = %if.end256
+  br i1 undef, label %for.cond246.outer, label %if.end309
+
+if.end309:                                        ; preds = %if.then274
+  br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.body361:                                      ; preds = %for.body361, %if.end309
+  br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.cond372.loopexit:                             ; preds = %for.body361, %if.end309
+  br i1 undef, label %for.cond394.preheader, label %for.cond246
+
+for.cond394.preheader:                            ; preds = %for.cond372.loopexit
+  br i1 undef, label %for.cond246.outer, label %for.body397
+
+for.body397:                                      ; preds = %for.cond394.preheader
+  unreachable
+
+for.body409:                                      ; preds = %for.inc558, %for.body409.lr.ph
+  %k.029 = phi i32 [ 1, %for.body409.lr.ph ], [ %inc559, %for.inc558 ]
+  br i1 undef, label %if.then412, label %if.else433
+
+if.then412:                                       ; preds = %for.body409
+  br label %if.end440
+
+if.else433:                                       ; preds = %for.body409
+  br label %if.end440
+
+if.end440:                                        ; preds = %if.else433, %if.then412
+  br i1 undef, label %for.inc558, label %if.end461
+
+if.end461:                                        ; preds = %if.end440
+  br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.body517:                                      ; preds = %for.body517, %if.end461
+  br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.cond528.loopexit:                             ; preds = %for.body517, %if.end461
+  br label %for.inc558
+
+for.inc558:                                       ; preds = %for.cond528.loopexit, %if.end440
+  %inc559 = add nsw i32 %k.029, 1
+  %cmp407 = icmp sgt i32 %inc559, %add406
+  br i1 %cmp407, label %for.cond246.outer, label %for.body409
+
+for.end562:                                       ; preds = %if.end250, %for.cond246
+  unreachable
+
+for.cond612:                                      ; preds = %while.body119.lr.ph.lr.ph, %if.end115
+  unreachable
+}
+
+define void @test3() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  br i1 undef, label %for.end11, label %for.body3
+
+for.body3:                                        ; preds = %for.end
+  unreachable
+
+for.end11:                                        ; preds = %for.end
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %for.end11
+  unreachable
+
+while.end:                                        ; preds = %for.end11
+  br i1 undef, label %if.end115, label %for.cond109
+
+for.cond109:                                      ; preds = %while.end
+  unreachable
+
+if.end115:                                        ; preds = %while.end
+  br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612
+
+while.body119.lr.ph.lr.ph:                        ; preds = %if.end115
+  br i1 undef, label %for.cond612, label %if.end123.us
+
+if.end123.us:                                     ; preds = %while.body119.lr.ph.lr.ph
+  br label %for.cond132.us
+
+for.cond132.us:                                   ; preds = %for.cond132.us, %if.end123.us
+  br i1 undef, label %if.then136.us, label %for.cond132.us
+
+if.then136.us:                                    ; preds = %for.cond132.us
+  br i1 undef, label %while.end220, label %while.body211
+
+while.body211:                                    ; preds = %while.body211, %if.then136.us
+  br i1 undef, label %while.end220, label %while.body211
+
+while.end220:                                     ; preds = %while.body211, %if.then136.us
+  br label %for.cond246.outer
+
+for.cond246.outer:                                ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220
+  br label %for.cond246
+
+for.cond246:                                      ; preds = %for.cond372.loopexit, %for.cond246.outer
+  br i1 undef, label %for.end562, label %if.end250
+
+if.end250:                                        ; preds = %for.cond246
+  br i1 undef, label %if.end256, label %for.end562
+
+if.end256:                                        ; preds = %if.end250
+  br i1 undef, label %if.then274, label %for.cond404.preheader
+
+for.cond404.preheader:                            ; preds = %if.end256
+  br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph
+
+for.body409.lr.ph:                                ; preds = %for.cond404.preheader
+  br label %for.body409
+
+if.then274:                                       ; preds = %if.end256
+  br i1 undef, label %for.cond246.outer, label %if.end309
+
+if.end309:                                        ; preds = %if.then274
+  br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.body361:                                      ; preds = %for.body361, %if.end309
+  br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.cond372.loopexit:                             ; preds = %for.body361, %if.end309
+  br i1 undef, label %for.cond394.preheader, label %for.cond246
+
+for.cond394.preheader:                            ; preds = %for.cond372.loopexit
+  br i1 undef, label %for.cond246.outer, label %for.body397
+
+for.body397:                                      ; preds = %for.cond394.preheader
+  unreachable
+
+for.body409:                                      ; preds = %for.inc558, %for.body409.lr.ph
+  br i1 undef, label %if.then412, label %if.else433
+
+if.then412:                                       ; preds = %for.body409
+  br label %if.end440
+
+if.else433:                                       ; preds = %for.body409
+  br label %if.end440
+
+if.end440:                                        ; preds = %if.else433, %if.then412
+  br i1 undef, label %for.inc558, label %if.end461
+
+if.end461:                                        ; preds = %if.end440
+  br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.body517:                                      ; preds = %for.body517, %if.end461
+  br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.cond528.loopexit:                             ; preds = %for.body517, %if.end461
+  br label %for.inc558
+
+for.inc558:                                       ; preds = %for.cond528.loopexit, %if.end440
+  br i1 undef, label %for.cond246.outer, label %for.body409
+
+for.end562:                                       ; preds = %if.end250, %for.cond246
+  unreachable
+
+for.cond612:                                      ; preds = %while.body119.lr.ph.lr.ph, %if.end115
+  unreachable
+}
+
+define void @test4() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %if.end8, label %if.else
+
+if.else:                                          ; preds = %entry
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %entry
+  br i1 undef, label %if.end26, label %if.else22
+
+if.else22:                                        ; preds = %if.end8
+  br label %if.end26
+
+if.end26:                                         ; preds = %if.else22, %if.end8
+  br i1 undef, label %if.end35, label %if.else31
+
+if.else31:                                        ; preds = %if.end26
+  br label %if.end35
+
+if.end35:                                         ; preds = %if.else31, %if.end26
+  br i1 undef, label %for.end226, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.end35
+  br label %for.body48
+
+for.body48:                                       ; preds = %for.inc221, %for.body.lr.ph
+  br i1 undef, label %for.inc221, label %for.body65.lr.ph
+
+for.body65.lr.ph:                                 ; preds = %for.body48
+  %0 = load i32* undef, align 4
+  br label %for.body65.us
+
+for.body65.us:                                    ; preds = %for.inc219.us, %for.body65.lr.ph
+  %k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
+  %idxprom66.us = sext i32 %k.09.us to i64
+  br i1 undef, label %for.inc219.us, label %if.end72.us
+
+if.end72.us:                                      ; preds = %for.body65.us
+  br i1 undef, label %if.end93.us, label %if.then76.us
+
+if.then76.us:                                     ; preds = %if.end72.us
+  br label %if.end93.us
+
+if.end93.us:                                      ; preds = %if.then76.us, %if.end72.us
+  br i1 undef, label %if.end110.us, label %for.inc219.us
+
+if.end110.us:                                     ; preds = %if.end93.us
+  br i1 undef, label %for.inc219.us, label %for.body142.us
+
+for.body142.us:                                   ; preds = %for.cond139.loopexit.us, %if.end110.us
+  br label %for.cond152.us
+
+for.cond152.us:                                   ; preds = %for.cond152.us, %for.body142.us
+  br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
+
+for.inc219.us:                                    ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
+  %inc.us = add nsw i32 %k.09.us, 1
+  %cmp64.us = icmp sgt i32 %inc.us, %0
+  br i1 %cmp64.us, label %for.inc221, label %for.body65.us
+
+for.cond139.loopexit.us:                          ; preds = %for.cond152.us
+  br i1 undef, label %for.inc219.us, label %for.body142.us
+
+for.inc221:                                       ; preds = %for.inc219.us, %for.body48
+  br label %for.body48
+
+for.end226:                                       ; preds = %if.end35
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -S
+
+; Make sure that we don't crash when optimizing the vectors of pointers.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.hoge = type { double*, double*, double*, double** }
+
+define void @widget(%struct.hoge* nocapture %arg) nounwind uwtable ssp {
+bb:
+  %tmp = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 0
+  br i1 undef, label %bb1, label %bb17
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  br label %bb17
+
+bb3:                                              ; preds = %bb1
+  %tmp4 = bitcast double** %tmp to <2 x double*>*
+  %tmp5 = load <2 x double*>* %tmp4, align 8
+  %tmp6 = ptrtoint <2 x double*> %tmp5 to <2 x i64>
+  %tmp7 = sub <2 x i64> zeroinitializer, %tmp6
+  %tmp8 = ashr exact <2 x i64> %tmp7, <i64 3, i64 3>
+  %tmp9 = extractelement <2 x i64> %tmp8, i32 0
+  %tmp10 = add nsw i64 undef, %tmp9
+  br i1 undef, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb3
+  br label %bb13
+
+bb12:                                             ; preds = %bb3
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb11
+  br i1 undef, label %bb16, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br i1 undef, label %bb16, label %bb15
+
+bb15:                                             ; preds = %bb14
+  br label %bb16
+
+bb16:                                             ; preds = %bb15, %bb14, %bb13
+  unreachable
+
+bb17:                                             ; preds = %bb2, %bb
+  ret void
+}

Modified: llvm/branches/R600/test/Transforms/InstCombine/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/cast.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/cast.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/cast.ll Tue Nov 13 09:21:47 2012
@@ -694,3 +694,209 @@
 ; CHECK: @test67
 ; CHECK: ret i1 false
 }
+
+%s = type { i32, i32, i32 }
+
+define %s @test68(%s *%p, i64 %i) {
+; CHECK: @test68
+  %o = mul i64 %i, 12
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr %s*
+  %r = bitcast i8* %pp to %s*
+  %l = load %s* %r
+; CHECK-NEXT: load %s*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define double @test69(double *%p, i64 %i) {
+; CHECK: @test69
+  %o = shl nsw i64 %i, 3
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr inbounds double*
+  %r = bitcast i8* %pp to double*
+  %l = load double* %r
+; CHECK-NEXT: load double*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
+define %s @test70(%s *%p, i64 %i) {
+; CHECK: @test70
+  %o = mul nsw i64 %i, 36
+; CHECK-NEXT: mul nsw i64 %i, 3
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr inbounds %s*
+  %r = bitcast i8* %pp to %s*
+  %l = load %s* %r
+; CHECK-NEXT: load %s*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define double @test71(double *%p, i64 %i) {
+; CHECK: @test71
+  %o = shl i64 %i, 5
+; CHECK-NEXT: shl i64 %i, 2
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr i8* %q, i64 %o
+; CHECK-NEXT: getelementptr double*
+  %r = bitcast i8* %pp to double*
+  %l = load double* %r
+; CHECK-NEXT: load double*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test72(double *%p, i32 %i) {
+; CHECK: @test72
+  %so = mul nsw i32 %i, 8
+  %o = sext i32 %so to i64
+; CHECK-NEXT: sext i32 %i to i64
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr inbounds double*
+  %r = bitcast i8* %pp to double*
+  %l = load double* %r
+; CHECK-NEXT: load double*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test73(double *%p, i128 %i) {
+; CHECK: @test73
+  %lo = mul nsw i128 %i, 8
+  %o = trunc i128 %lo to i64
+; CHECK-NEXT: trunc i128 %i to i64
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr double*
+  %r = bitcast i8* %pp to double*
+  %l = load double* %r
+; CHECK-NEXT: load double*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test74(double *%p, i64 %i) {
+; CHECK: @test74
+  %q = bitcast double* %p to i64*
+  %pp = getelementptr inbounds i64* %q, i64 %i
+; CHECK-NEXT: getelementptr inbounds double*
+  %r = bitcast i64* %pp to double*
+  %l = load double* %r
+; CHECK-NEXT: load double*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
+define i32* @test75(i32* %p, i32 %x) {
+; CHECK: @test75
+  %y = shl i32 %x, 3
+; CHECK-NEXT: shl i32 %x, 3
+  %z = sext i32 %y to i64
+; CHECK-NEXT: sext i32 %y to i64
+  %q = bitcast i32* %p to i8*
+  %r = getelementptr i8* %q, i64 %z
+  %s = bitcast i8* %r to i32*
+  ret i32* %s
+}
+
+define %s @test76(%s *%p, i64 %i, i64 %j) {
+; CHECK: @test76
+  %o = mul i64 %i, 12
+  %o2 = mul nsw i64 %o, %j
+; CHECK-NEXT: %o2 = mul i64 %i, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o2
+; CHECK-NEXT: getelementptr %s* %p, i64 %o2
+  %r = bitcast i8* %pp to %s*
+  %l = load %s* %r
+; CHECK-NEXT: load %s*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define %s @test77(%s *%p, i64 %i, i64 %j) {
+; CHECK: @test77
+  %o = mul nsw i64 %i, 36
+  %o2 = mul nsw i64 %o, %j
+; CHECK-NEXT: %o = mul nsw i64 %i, 3
+; CHECK-NEXT: %o2 = mul nsw i64 %o, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %o2
+; CHECK-NEXT: getelementptr inbounds %s* %p, i64 %o2
+  %r = bitcast i8* %pp to %s*
+  %l = load %s* %r
+; CHECK-NEXT: load %s*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
+; CHECK: @test78
+  %a = mul nsw i32 %k, 36
+; CHECK-NEXT: mul nsw i32 %k, 3
+  %b = mul nsw i32 %a, %l
+; CHECK-NEXT: mul nsw i32 %a, %l
+  %c = sext i32 %b to i128
+; CHECK-NEXT: sext i32 %b to i128
+  %d = mul nsw i128 %c, %m
+; CHECK-NEXT: mul nsw i128 %c, %m
+  %e = mul i128 %d, %n
+; CHECK-NEXT: mul i128 %d, %n
+  %f = trunc i128 %e to i64
+; CHECK-NEXT: trunc i128 %e to i64
+  %g = mul nsw i64 %f, %i
+; CHECK-NEXT: mul i64 %f, %i
+  %h = mul nsw i64 %g, %j
+; CHECK-NEXT: mul i64 %g, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8* %q, i64 %h
+; CHECK-NEXT: getelementptr %s* %p, i64 %h
+  %r = bitcast i8* %pp to %s*
+  %load = load %s* %r
+; CHECK-NEXT: load %s*
+  ret %s %load
+; CHECK-NEXT: ret %s
+}
+
+define %s @test79(%s *%p, i64 %i, i32 %j) {
+; CHECK: @test79
+  %a = mul nsw i64 %i, 36
+; CHECK: mul nsw i64 %i, 36
+  %b = trunc i64 %a to i32
+  %c = mul i32 %b, %j
+  %q = bitcast %s* %p to i8*
+; CHECK: bitcast
+  %pp = getelementptr inbounds i8* %q, i32 %c
+  %r = bitcast i8* %pp to %s*
+  %l = load %s* %r
+  ret %s %l
+}
+
+define double @test80([100 x double]* %p, i32 %i) {
+; CHECK: @test80
+  %tmp = mul nsw i32 %i, 8
+; CHECK-NEXT: sext i32 %i to i64
+  %q = bitcast [100 x double]* %p to i8*
+  %pp = getelementptr i8* %q, i32 %tmp
+; CHECK-NEXT: getelementptr [100 x double]*
+  %r = bitcast i8* %pp to double*
+  %l = load double* %r
+; CHECK-NEXT: load double*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test81(double *%p, float %f) {
+  %i = fptosi float %f to i64
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr i8* %q, i64 %i
+  %r = bitcast i8* %pp to double*
+  %l = load double* %r
+  ret double %l
+}

Modified: llvm/branches/R600/test/Transforms/InstCombine/fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/fcmp.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/fcmp.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/fcmp.ll Tue Nov 13 09:21:47 2012
@@ -54,9 +54,8 @@
   %ext = fpext float %x to ppc_fp128
   %cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000
   ret i1 %cmp
-; Can't convert ppc_fp128
 ; CHECK: @test7
-; CHECK-NEXT: fpext float %x to ppc_fp128
+; CHECK-NEXT: fcmp ogt float %x, 0.000000e+00
 }
 
 define float @test8(float %x) nounwind readnone optsize ssp {

Added: llvm/branches/R600/test/Transforms/InstCombine/stpcpy-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/stpcpy-1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/stpcpy-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/stpcpy-1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,46 @@
+; Test that the stpcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+ at hello = constant [6 x i8] c"hello\00"
+ at a = common global [32 x i8] zeroinitializer, align 1
+ at b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @stpcpy(i8*, i8*)
+
+define i8* @test_simplify1() {
+; CHECK: @test_simplify1
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+  %ret = call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NEXT: getelementptr inbounds ([32 x i8]* @a, i32 0, i32 5)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK: @test_simplify2
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+
+  %ret = call i8* @stpcpy(i8* %dst, i8* %dst)
+; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
+; CHECK-NEXT: getelementptr inbounds [32 x i8]* @a, i32 0, i32 [[LEN]]
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+  %ret = call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @stpcpy
+  ret i8* %ret
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/stpcpy-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/stpcpy-2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/stpcpy-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/stpcpy-2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,22 @@
+; Test that the stpcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+ at hello = constant [6 x i8] c"hello\00"
+ at a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @stpcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+  call i16* @stpcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @stpcpy
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,96 @@
+; Test lib call simplification of __stpcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at a = common global [60 x i8] zeroinitializer, align 1
+ at b = common global [60 x i8] zeroinitializer, align 1
+ at .str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where slen >= strlen (src).
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret void
+}
+
+; Check cases where there are no string constants.
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @stpcpy
+  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret void
+}
+
+; Check case where the string length is not constant.
+
+define i8* @test_simplify5() {
+; CHECK: @test_simplify5
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK: @__memcpy_chk
+  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
+; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
+  ret i8* %ret
+}
+
+; Check case where the source and destination are the same.
+
+define i8* @test_simplify6() {
+; CHECK: @test_simplify6
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+
+; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
+; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
+  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
+  ret i8* %ret
+}
+
+; Check case where slen < strlen (src).
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__stpcpy_chk
+  call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8)
+  ret void
+}
+
+declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly

Added: llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/stpcpy_chk-2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __stpcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at a = common global [60 x i16] zeroinitializer, align 1
+ at .str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+  %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+  call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+  ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)

Added: llvm/branches/R600/test/Transforms/InstCombine/strlen-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strlen-1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strlen-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strlen-1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,97 @@
+; Test that the strlen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at hello = constant [6 x i8] c"hello\00"
+ at null = constant [1 x i8] zeroinitializer
+ at null_hello = constant [7 x i8] c"\00hello\00"
+ at nullstring = constant i8 0
+ at a = common global [32 x i8] zeroinitializer, align 1
+
+declare i32 @strlen(i8*)
+
+; Check strlen(string constant) -> integer constant.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+; CHECK-NEXT: ret i32 5
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %null_l = call i32 @strlen(i8* %null_p)
+  ret i32 %null_l
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+  %null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  %null_hello_l = call i32 @strlen(i8* %null_hello_p)
+  ret i32 %null_hello_l
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %len = tail call i32 @strlen(i8* @nullstring) nounwind
+  ret i32 %len
+; CHECK-NEXT: ret i32 0
+}
+
+; Check strlen(x) == 0 --> *x == 0.
+
+define i1 @test_simplify5() {
+; CHECK: @test_simplify5
+  %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  %eq_hello = icmp eq i32 %hello_l, 0
+  ret i1 %eq_hello
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @test_simplify6() {
+; CHECK: @test_simplify6
+  %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %null_l = call i32 @strlen(i8* %null_p)
+  %eq_null = icmp eq i32 %null_l, 0
+  ret i1 %eq_null
+; CHECK-NEXT: ret i1 true
+}
+
+; Check strlen(x) != 0 --> *x != 0.
+
+define i1 @test_simplify7() {
+; CHECK: @test_simplify7
+  %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  %ne_hello = icmp ne i32 %hello_l, 0
+  ret i1 %ne_hello
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @test_simplify8() {
+; CHECK: @test_simplify8
+  %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %null_l = call i32 @strlen(i8* %null_p)
+  %ne_null = icmp ne i32 %null_l, 0
+  ret i1 %ne_null
+; CHECK-NEXT: ret i1 false
+}
+
+; Check cases that shouldn't be simplified.
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %a_p = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %a_l = call i32 @strlen(i8* %a_p)
+; CHECK-NEXT: %a_l = call i32 @strlen
+  ret i32 %a_l
+; CHECK-NEXT: ret i32 %a_l
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/strlen-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strlen-2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strlen-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strlen-2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,18 @@
+; Test that the strlen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at hello = constant [6 x i8] c"hello\00"
+
+declare i32 @strlen(i8*, i32)
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p, i32 187)
+; CHECK-NEXT: %hello_l = call i32 @strlen
+  ret i32 %hello_l
+; CHECK-NEXT: ret i32 %hello_l
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/strncpy-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strncpy-1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strncpy-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strncpy-1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,95 @@
+; Test that the strncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at hello = constant [6 x i8] c"hello\00"
+ at null = constant [1 x i8] zeroinitializer
+ at null_hello = constant [7 x i8] c"\00hello\00"
+ at a = common global [32 x i8] zeroinitializer, align 1
+ at b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncpy(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+; Check a bunch of strncpy invocations together.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strncpy
+; CHECK: call i32 @puts
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strncpy(i8* %arg1, i8* %arg2, i32 6)
+
+  %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strncpy(i8* %rslt1, i8* %arg3, i32 42)
+
+  %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strncpy(i8* %rslt2, i8* %arg4, i32 42)
+
+  call i32 @puts( i8* %rslt3 )
+  ret i32 0
+}
+
+; Check strncpy(x, "", y) -> memset(x, '\0', y, 1).
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK: call void @llvm.memset.p0i8.i32
+  ret void
+}
+
+; Check strncpy(x, y, 0) -> x.
+
+define i8* @test_simplify3() {
+; CHECK: @test_simplify3
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+  %ret = call i8* @strncpy(i8* %dst, i8* %src, i32 0)
+  ret i8* %ret
+; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0)
+}
+
+; Check  strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant].
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
+  ret void
+}
+
+; Check cases that shouldn't be simplified.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK: call i8* @strncpy
+  ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 8)
+; CHECK: call i8* @strncpy
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/strncpy-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strncpy-2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strncpy-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strncpy-2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,22 @@
+; Test that the strncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at hello = constant [6 x i8] c"hello\00"
+ at a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncpy(i8*, i8*, i32)
+
+; Check that 'strncpy' functions with the wrong prototype aren't simplified.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+  call i16* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK: call i16* @strncpy
+  ret void
+}

Modified: llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll Tue Nov 13 09:21:47 2012
@@ -7,27 +7,27 @@
 
 @a = common global [60 x i8] zeroinitializer, align 1
 @b = common global [60 x i8] zeroinitializer, align 1
- at .str = private constant [8 x i8] c"abcdefg\00"
+ at .str = private constant [12 x i8] c"abcdefghijk\00"
 
 ; Check cases where dstlen >= len
 
 define void @test_simplify1() {
 ; CHECK: @test_simplify1
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @strncpy
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
   ret void
 }
 
 define void @test_simplify2() {
 ; CHECK: @test_simplify2
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call i8* @strncpy
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 8)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
   ret void
 }
 
@@ -37,7 +37,7 @@
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
 ; CHECK-NEXT: call i8* @strncpy
-  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60)
+  call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
   ret void
 }
 
@@ -46,7 +46,7 @@
 define void @test_no_simplify1() {
 ; CHECK: @test_no_simplify1
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK-NEXT: call i8* @__strncpy_chk
   call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)

Added: llvm/branches/R600/test/Transforms/InstCombine/strpbrk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strpbrk-1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strpbrk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strpbrk-1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,68 @@
+; Test that the strpbrk library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at hello = constant [12 x i8] c"hello world\00"
+ at w = constant [2 x i8] c"w\00"
+ at null = constant [1 x i8] zeroinitializer
+
+declare i8* @strpbrk(i8*, i8*)
+
+; Check strpbrk(s, "") -> NULL.
+
+define i8* @test_simplify1(i8* %str) {
+; CHECK: @test_simplify1
+  %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* null
+}
+
+; Check strpbrk("", s) -> NULL.
+
+define i8* @test_simplify2(i8* %pat) {
+; CHECK: @test_simplify2
+  %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* null
+}
+
+; Check strpbrk(s1, s2), where s1 and s2 are constants.
+
+define i8* @test_simplify3() {
+; CHECK: @test_simplify3
+  %str = getelementptr [12 x i8]* @hello, i32 0, i32 0
+  %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* getelementptr inbounds ([12 x i8]* @hello, i32 0, i32 6)
+}
+
+; Check strpbrk(s, "a") -> strchr(s, 'a').
+
+define i8* @test_simplify4(i8* %str) {
+; CHECK: @test_simplify4
+  %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: [[VAR:%[a-z]+]] = call i8* @strchr(i8* %str, i32 119)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* [[VAR]]
+}
+
+; Check cases that shouldn't be simplified.
+
+define i8* @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK: @test_no_simplify1
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* %ret
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/strpbrk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strpbrk-2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strpbrk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strpbrk-2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,23 @@
+; Test that the strpbrk library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at hello = constant [12 x i8] c"hello world\00"
+ at w = constant [2 x i8] c"w\00"
+
+declare i16* @strpbrk(i8*, i8*)
+
+; Check that 'strpbrk' functions with the wrong prototype aren't simplified.
+
+define i16* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %str = getelementptr [12 x i8]* @hello, i32 0, i32 0
+  %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
+
+  %ret = call i16* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i16* @strpbrk
+  ret i16* %ret
+; CHECK-NEXT: ret i16* %ret
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/strto-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strto-1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strto-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strto-1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,82 @@
+; Test that the strto* library call simplifiers works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtol(i8*, i8**, i32)
+
+declare double @strtod(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare double @strtod(i8*, i8**, i32)
+
+declare float @strtof(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare float @strtof(i8*, i8**, i32)
+
+declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoul(i8*, i8**, i32)
+
+declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoll(i8*, i8**, i32)
+
+declare double @strtold(i8* %s, i8** %endptr)
+; CHECK: declare double @strtold(i8*, i8**)
+
+declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoull(i8*, i8**, i32)
+
+define void @test_simplify1(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify1
+  call i64 @strtol(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtol(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify2(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify2
+  call double @strtod(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call double @strtod(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify3(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify3
+  call float @strtof(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call float @strtof(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify4(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify4
+  call i64 @strtoul(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoul(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify5(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify5
+  call i64 @strtoll(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoll(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify6(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify6
+  call double @strtold(i8* %x, i8** null)
+; CHECK-NEXT: call double @strtold(i8* nocapture %x, i8** null)
+  ret void
+}
+
+define void @test_simplify7(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify7
+  call i64 @strtoull(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoull(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_no_simplify1(i8* %x, i8** %endptr) {
+; CHECK: @test_no_simplify1
+  call i64 @strtol(i8* %x, i8** %endptr, i32 10)
+; CHECK-NEXT: call i64 @strtol(i8* %x, i8** %endptr, i32 10)
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/InstCombine/vector_gep2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/vector_gep2.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/vector_gep2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/vector_gep2.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <2 x i8*> @testa(<2 x i8*> %a) {
+; CHECK: @testa
+  %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 1>
+; CHECK: getelementptr <2 x i8*> %a, <2 x i64> <i64 0, i64 1>
+  ret <2 x i8*> %g
+}

Modified: llvm/branches/R600/test/Transforms/Internalize/2008-05-09-AllButMain.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/Internalize/2008-05-09-AllButMain.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/Internalize/2008-05-09-AllButMain.ll (original)
+++ llvm/branches/R600/test/Transforms/Internalize/2008-05-09-AllButMain.ll Tue Nov 13 09:21:47 2012
@@ -1,27 +1,55 @@
-; No arguments means internalize all but main
-; RUN: opt < %s -internalize -S | grep internal | count 4
+; No arguments means internalize everything
+; RUN: opt < %s -internalize -S | FileCheck --check-prefix=NOARGS %s
+
 ; Internalize all but foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | grep internal | count 3
-; Non existent files should be treated as if they were empty (so internalize all but main)
-; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 4
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=LIST %s
+
+; Non existent files should be treated as if they were empty (so internalize
+; everything)
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=EMPTYFILE %s
+
+; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=LIST2 %s
+
 ; -file and -list options should be merged, the .apifile contains foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | grep internal | count 2
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | FileCheck --check-prefix=MERGE %s
+
+; NOARGS: @i = internal global
+; LIST: @i = internal global
+; EMPTYFILE: @i = internal global
+; LIST2: @i = internal global
+; MERGE: @i = internal global
+ at i = global i32 0
 
- at i = weak global i32 0          ; <i32*> [#uses=0]
- at j = weak global i32 0          ; <i32*> [#uses=0]
+; NOARGS: @j = internal global
+; LIST: @j = global
+; EMPTYFILE: @j = internal global
+; LIST2: @j = internal global
+; MERGE: @j = global
+ at j = global i32 0
 
-define void @main(...) {
-entry:  
+; NOARGS: define internal void @main
+; LIST: define internal void @main
+; EMPTYFILE: define internal void @main
+; LIST2: define internal void @main
+; MERGE: define internal void @main
+define void @main() {
         ret void
 }
 
-define void @foo(...) {
-entry:  
+; NOARGS: define internal void @foo
+; LIST: define void @foo
+; EMPTYFILE: define internal void @foo
+; LIST2: define void @foo
+; MERGE: define void @foo
+define void @foo() {
         ret void
 }
 
-define void @bar(...) {
-entry:  
+; NOARGS: define internal void @bar
+; LIST: define internal void @bar
+; EMPTYFILE: define internal void @bar
+; LIST2: define void @bar
+; MERGE: define void @bar
+define void @bar() {
         ret void
 }

Modified: llvm/branches/R600/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll (original)
+++ llvm/branches/R600/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s -internalize -S | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-list main -S | grep internal | count 3
 
 @A = global i32 0
 @B = alias i32* @A

Modified: llvm/branches/R600/test/Transforms/JumpThreading/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/JumpThreading/crash.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/JumpThreading/crash.ll (original)
+++ llvm/branches/R600/test/Transforms/JumpThreading/crash.ll Tue Nov 13 09:21:47 2012
@@ -511,3 +511,56 @@
 if.end:                                           ; preds = %for.cond
   ret void
 }
+
+define void @PR14233(i1 %cmp, i1 %cmp2, i1 %cmp3, i1 %cmp4) {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+  br label %if.end
+
+cond.false:
+  br label %if.end
+
+if.end:
+  %A = phi i64 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cmp2, label %bb, label %if.end2
+
+bb:
+  br label %if.end2
+
+if.end2:
+  %B = phi i64 [ ptrtoint (i8* ()* @PR14233.f1 to i64), %bb ], [ %A, %if.end ]
+  %cmp.ptr = icmp eq i64 %B, ptrtoint (i8* ()* @PR14233.f2 to i64)
+  br i1 %cmp.ptr, label %cond.true2, label %if.end3
+
+cond.true2:
+  br i1 %cmp3, label %bb2, label %ur
+
+bb2:
+  br i1 %cmp4, label %if.end4, label %if.end3
+
+if.end4:
+  unreachable
+
+if.end3:
+  %cmp.ptr2 = icmp eq i64 %B, ptrtoint (i8* ()* @PR14233.f2 to i64)
+  br i1 %cmp.ptr2, label %ur, label %if.then601
+
+if.then601:
+  %C = icmp eq i64 %B, 0
+  br i1 %C, label %bb3, label %bb4
+
+bb3:
+  unreachable
+
+bb4:
+  unreachable
+
+ur:
+  unreachable
+}
+
+declare i8* @PR14233.f1()
+
+declare i8* @PR14233.f2()

Modified: llvm/branches/R600/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll (original)
+++ llvm/branches/R600/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm | lli
+; RUN: opt < %s -licm | lli %defaultjit
 
 define i32 @main() {
 entry:

Modified: llvm/branches/R600/test/Transforms/LoopIdiom/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopIdiom/basic.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopIdiom/basic.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopIdiom/basic.ll Tue Nov 13 09:21:47 2012
@@ -383,4 +383,37 @@
 
 }
 
+define void @PR14241(i32* %s, i64 %size) {
+; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
+; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
+; instead of a memmove. If we get the memmove transform back, this will catch
+; regressions.
+;
+; CHECK: @PR14241
 
+entry:
+  %end.idx = add i64 %size, -1
+  %end.ptr = getelementptr inbounds i32* %s, i64 %end.idx
+  br label %while.body
+; CHECK-NOT: memcpy
+;
+; FIXME: When we regain the ability to form a memmove here, this test should be
+; reversed and turned into a positive assertion.
+; CHECK-NOT: memmove
+
+while.body:
+  %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
+  %src.ptr = getelementptr inbounds i32* %phi.ptr, i64 1
+  %val = load i32* %src.ptr, align 4
+; CHECK: load
+  %dst.ptr = getelementptr inbounds i32* %phi.ptr, i64 0
+  store i32 %val, i32* %dst.ptr, align 4
+; CHECK: store
+  %next.ptr = getelementptr inbounds i32* %phi.ptr, i64 1
+  %cmp = icmp eq i32* %next.ptr, %end.ptr
+  br i1 %cmp, label %exit, label %while.body
+
+exit:
+  ret void
+; CHECK: ret void
+}

Added: llvm/branches/R600/test/Transforms/LoopIdiom/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopIdiom/crash.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopIdiom/crash.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopIdiom/crash.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,25 @@
+; RUN: opt -basicaa -loop-idiom -S < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't crash inside DependenceAnalysis
+; PR14219
+define void @test1(i64* %iwork, i64 %x)  {
+bb0:
+  %mul116 = mul nsw i64 %x, %x
+  %incdec.ptr6.sum175 = add i64 42, %x
+  %arrayidx135 = getelementptr inbounds i64* %iwork, i64 %incdec.ptr6.sum175
+  br label %bb1
+bb1:
+  %storemerge4226 = phi i64 [ 0, %bb0 ], [ %inc139, %bb1 ]
+  store i64 1, i64* %arrayidx135, align 8
+  %incdec.ptr6.sum176 = add i64 %mul116, %storemerge4226
+  %arrayidx137 = getelementptr inbounds i64* %iwork, i64 %incdec.ptr6.sum176
+  store i64 1, i64* %arrayidx137, align 8
+  %inc139 = add nsw i64 %storemerge4226, 1
+  %cmp131 = icmp sgt i64 %storemerge4226, 42
+  br i1 %cmp131, label %bb2, label %bb1
+bb2:
+  ret void
+}
+

Added: llvm/branches/R600/test/Transforms/LoopIdiom/scev-invalidation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopIdiom/scev-invalidation.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopIdiom/scev-invalidation.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopIdiom/scev-invalidation.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,74 @@
+; RUN: opt -S -indvars -loop-idiom < %s
+; PR14214
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @quote_arg() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %backslashes.0 = phi i32 [ undef, %entry ], [ %backslashes.2, %for.inc ]
+  %p.0 = phi i8* [ undef, %entry ], [ %incdec.ptr3, %for.inc ]
+  %q.0 = phi i8* [ undef, %entry ], [ %q.2, %for.inc ]
+  %0 = load i8* %p.0, align 1
+  switch i8 %0, label %while.cond.preheader [
+    i8 0, label %for.cond4.preheader
+    i8 92, label %for.inc
+  ]
+
+while.cond.preheader:                             ; preds = %for.cond
+  %tobool210 = icmp eq i32 %backslashes.0, 0
+  br i1 %tobool210, label %for.inc.loopexit, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %while.cond.preheader
+  %1 = add i32 %backslashes.0, -1
+  %2 = zext i32 %1 to i64
+  br label %while.body
+
+for.cond4.preheader:                              ; preds = %for.cond
+  %tobool57 = icmp eq i32 %backslashes.0, 0
+  br i1 %tobool57, label %for.end10, label %for.body6.lr.ph
+
+for.body6.lr.ph:                                  ; preds = %for.cond4.preheader
+  br label %for.body6
+
+while.body:                                       ; preds = %while.body.lr.ph, %while.body
+  %q.112 = phi i8* [ %q.0, %while.body.lr.ph ], [ %incdec.ptr, %while.body ]
+  %backslashes.111 = phi i32 [ %backslashes.0, %while.body.lr.ph ], [ %dec, %while.body ]
+  %incdec.ptr = getelementptr inbounds i8* %q.112, i64 1
+  store i8 92, i8* %incdec.ptr, align 1
+  %dec = add nsw i32 %backslashes.111, -1
+  %tobool2 = icmp eq i32 %dec, 0
+  br i1 %tobool2, label %while.cond.for.inc.loopexit_crit_edge, label %while.body
+
+while.cond.for.inc.loopexit_crit_edge:            ; preds = %while.body
+  %scevgep.sum = add i64 %2, 1
+  %scevgep13 = getelementptr i8* %q.0, i64 %scevgep.sum
+  br label %for.inc.loopexit
+
+for.inc.loopexit:                                 ; preds = %while.cond.for.inc.loopexit_crit_edge, %while.cond.preheader
+  %q.1.lcssa = phi i8* [ %scevgep13, %while.cond.for.inc.loopexit_crit_edge ], [ %q.0, %while.cond.preheader ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc.loopexit, %for.cond
+  %backslashes.2 = phi i32 [ %backslashes.0, %for.cond ], [ 0, %for.inc.loopexit ]
+  %q.2 = phi i8* [ %q.0, %for.cond ], [ %q.1.lcssa, %for.inc.loopexit ]
+  %incdec.ptr3 = getelementptr inbounds i8* %p.0, i64 1
+  br label %for.cond
+
+for.body6:                                        ; preds = %for.body6.lr.ph, %for.body6
+  %q.39 = phi i8* [ %q.0, %for.body6.lr.ph ], [ %incdec.ptr7, %for.body6 ]
+  %backslashes.38 = phi i32 [ %backslashes.0, %for.body6.lr.ph ], [ %dec9, %for.body6 ]
+  %incdec.ptr7 = getelementptr inbounds i8* %q.39, i64 1
+  store i8 92, i8* %incdec.ptr7, align 1
+  %dec9 = add nsw i32 %backslashes.38, -1
+  %tobool5 = icmp eq i32 %dec9, 0
+  br i1 %tobool5, label %for.cond4.for.end10_crit_edge, label %for.body6
+
+for.cond4.for.end10_crit_edge:                    ; preds = %for.body6
+  br label %for.end10
+
+for.end10:                                        ; preds = %for.cond4.for.end10_crit_edge, %for.cond4.preheader
+  ret i32 undef
+}

Added: llvm/branches/R600/test/Transforms/LoopUnroll/pr14167.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopUnroll/pr14167.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopUnroll/pr14167.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopUnroll/pr14167.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @test1() nounwind {
+; Ensure that we don't crash when the trip count == -1.
+; CHECK: @test1
+entry:
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  br i1 false, label %middle.block, label %vector.ph
+
+vector.ph:                                        ; preds = %for.cond2.preheader
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  br i1 undef, label %middle.block.loopexit, label %vector.body
+
+middle.block.loopexit:                            ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.loopexit, %for.cond2.preheader
+  br i1 true, label %for.end, label %scalar.preheader
+
+scalar.preheader:                                 ; preds = %middle.block
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %scalar.preheader
+  %indvars.iv = phi i64 [ 16000, %scalar.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 16000
+  br i1 %exitcond, label %for.body4, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body4
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %middle.block
+  br i1 undef, label %for.cond2.preheader, label %for.end15
+
+for.end15:                                        ; preds = %for.end
+  ret void
+}

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-20-infloop.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-20-infloop.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-20-infloop.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
 
 ; Check that we don't fall into an infinite loop.
 define void @test() nounwind {

Added: llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,57 @@
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-width=4 
+
+; Check that we don't crash.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.6.3 LLVM: 3.2svn\22"
+
+ at b = common global [32000 x float] zeroinitializer, align 16
+
+define i32 @set1ds(i32 %_n, float* nocapture %arr, float %value, i32 %stride) nounwind uwtable {
+entry:
+  %0 = icmp sgt i32 %_n, 0
+  br i1 %0, label %"3.lr.ph", label %"5"
+
+"3.lr.ph":                                        ; preds = %entry
+  %1 = bitcast float* %arr to i8*
+  %2 = sext i32 %stride to i64
+  br label %"3"
+
+"3":                                              ; preds = %"3.lr.ph", %"3"
+  %indvars.iv = phi i64 [ 0, %"3.lr.ph" ], [ %indvars.iv.next, %"3" ]
+  %3 = shl nsw i64 %indvars.iv, 2
+  %4 = getelementptr inbounds i8* %1, i64 %3
+  %5 = bitcast i8* %4 to float*
+  store float %value, float* %5, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, %2
+  %6 = trunc i64 %indvars.iv.next to i32
+  %7 = icmp slt i32 %6, %_n
+  br i1 %7, label %"3", label %"5"
+
+"5":                                              ; preds = %"3", %entry
+  ret i32 0
+}
+
+define i32 @init(i8* nocapture %name) unnamed_addr nounwind uwtable {
+entry:
+  br label %"3"
+
+"3":                                              ; preds = %"3", %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"3" ]
+  %0 = shl nsw i64 %indvars.iv, 2
+  %1 = getelementptr inbounds i8* bitcast (float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0
+  %2 = bitcast i8* %1 to float*
+  store float -1.000000e+00, float* %2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 16000
+  br i1 %exitcond, label %"5", label %"3"
+
+"5":                                              ; preds = %"3"
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"alias set 7: float", metadata !1}
+!1 = metadata !{metadata !1}

Added: llvm/branches/R600/test/Transforms/LoopVectorize/X86/avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/X86/avx1.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/X86/avx1.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/X86/avx1.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,49 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @read_mod_write_single_ptr
+;CHECK: load <8 x float>
+;CHECK: ret i32
+define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %3 = load float* %2, align 4
+  %4 = fmul float %3, 3.000000e+00
+  store float %4, float* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
+
+
+;CHECK: @read_mod_i64
+;CHECK: load <8 x i64>
+;CHECK: ret i32
+define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i64* %a, i64 %indvars.iv
+  %3 = load i64* %2, align 4
+  %4 = mul i64 %3, 3
+  store i64 %4, i64* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}

Added: llvm/branches/R600/test/Transforms/LoopVectorize/X86/conversion-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/X86/conversion-cost.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/X86/conversion-cost.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/X86/conversion-cost.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,48 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @conversion_cost1
+;CHECK: store <2 x i8>
+;CHECK: ret
+define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 3
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ]
+  %2 = trunc i64 %indvars.iv to i8
+  %3 = getelementptr inbounds i8* %A, i64 %indvars.iv
+  store i8 %2, i8* %3, align 1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
+
+;CHECK: @conversion_cost2
+;CHECK: <2 x float>
+;CHECK: ret
+define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 9
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
+  %2 = add nsw i64 %indvars.iv, 3
+  %3 = trunc i64 %2 to i32
+  %4 = sitofp i32 %3 to float
+  %5 = getelementptr inbounds float* %B, i64 %indvars.iv
+  store float %4, float* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}

Added: llvm/branches/R600/test/Transforms/LoopVectorize/X86/cost-model.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/X86/cost-model.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/X86/cost-model.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/X86/cost-model.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+ at c = common global [2048 x i32] zeroinitializer, align 16
+ at b = common global [2048 x i32] zeroinitializer, align 16
+ at d = common global [2048 x i32] zeroinitializer, align 16
+ at a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: cost_model_1
+;CHECK: <4 x i32>
+;CHECK: ret void
+define void @cost_model_1() nounwind uwtable noinline ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0
+  %1 = load i32* %arrayidx, align 8
+  %idxprom1 = sext i32 %1 to i64
+  %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1
+  %2 = load i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+  %3 = load i32* %arrayidx4, align 4
+  %idxprom5 = sext i32 %3 to i64
+  %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5
+  store i32 %2, i32* %arrayidx6, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

Added: llvm/branches/R600/test/Transforms/LoopVectorize/X86/gcc-examples.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/X86/gcc-examples.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/X86/gcc-examples.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/X86/gcc-examples.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,62 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+ at b = common global [2048 x i32] zeroinitializer, align 16
+ at c = common global [2048 x i32] zeroinitializer, align 16
+ at a = common global [2048 x i32] zeroinitializer, align 16
+
+; Select VF = 8;
+;CHECK: @example1
+;CHECK: load <8 x i32>
+;CHECK: add nsw <8 x i32>
+;CHECK: store <8 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
+; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. 
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+  %3 = load i16* %2, align 2
+  %4 = sext i16 %3 to i32
+  %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+  store i32 %4, i32* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+

Added: llvm/branches/R600/test/Transforms/LoopVectorize/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/X86/lit.local.cfg?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/X86/lit.local.cfg (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/X86/lit.local.cfg Tue Nov 13 09:21:47 2012
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+

Added: llvm/branches/R600/test/Transforms/LoopVectorize/cpp-new-array.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/cpp-new-array.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/cpp-new-array.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/cpp-new-array.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,46 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @cpp_new_arrays
+;CHECK: insertelement <4 x i32>
+;CHECK: load <4 x float>
+;CHECK: fadd <4 x float>
+;CHECK: ret i32
+define i32 @cpp_new_arrays() uwtable ssp {
+entry:
+  %call = call noalias i8* @_Znwm(i64 4)
+  %0 = bitcast i8* %call to float*
+  store float 1.000000e+03, float* %0, align 4
+  %call1 = call noalias i8* @_Znwm(i64 4)
+  %1 = bitcast i8* %call1 to float*
+  store float 1.000000e+03, float* %1, align 4
+  %call3 = call noalias i8* @_Znwm(i64 4)
+  %2 = bitcast i8* %call3 to float*
+  store float 1.000000e+03, float* %2, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %idxprom = sext i32 %i.01 to i64
+  %arrayidx = getelementptr inbounds float* %0, i64 %idxprom
+  %3 = load float* %arrayidx, align 4
+  %idxprom5 = sext i32 %i.01 to i64
+  %arrayidx6 = getelementptr inbounds float* %1, i64 %idxprom5
+  %4 = load float* %arrayidx6, align 4
+  %add = fadd float %3, %4
+  %idxprom7 = sext i32 %i.01 to i64
+  %arrayidx8 = getelementptr inbounds float* %2, i64 %idxprom7
+  store float %add, float* %arrayidx8, align 4
+  %inc = add nsw i32 %i.01, 1
+  %cmp = icmp slt i32 %inc, 1000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %5 = load float* %2, align 4
+  %conv10 = fptosi float %5 to i32
+  ret i32 %conv10
+}
+
+declare noalias i8* @_Znwm(i64)

Added: llvm/branches/R600/test/Transforms/LoopVectorize/flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/flags.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/flags.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/flags.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,53 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @flags1
+;CHECK: load <4 x i32>
+;CHECK: mul nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret i32
+define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 9
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = mul nsw i32 %3, 3
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
+
+
+;CHECK: @flags2
+;CHECK: load <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret i32
+define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 9
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = mul i32 %3, 3
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/gcc-examples.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/gcc-examples.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/gcc-examples.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/gcc-examples.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -21,7 +21,7 @@
 
 ;CHECK: @example1
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 define void @example1() nounwind uwtable ssp {
@@ -227,6 +227,8 @@
 }
 
 ;CHECK: @example10a
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: load <4 x i16>
 ;CHECK: add <4 x i16>
 ;CHECK: store <4 x i16>

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/increment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/increment.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/increment.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/increment.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -11,7 +11,7 @@
 ;  }
 ;CHECK: @inc
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 define void @inc(i32 %n) nounwind uwtable noinline ssp {

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -11,7 +11,7 @@
 ;CHECK: shl i32
 ;CHECK: zext i32
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 define void @example1(i32 %n) nounwind uwtable ssp {

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/read-only.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/read-only.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/read-only.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/read-only.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/reduction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/reduction.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/reduction.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/reduction.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -66,7 +66,7 @@
 ;CHECK: @reduction_mix
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
-;CHECK: mul <4 x i32>
+;CHECK: mul nsw <4 x i32>
 ;CHECK: ret i32
 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -149,4 +149,84 @@
   ret i32 %sum.0.lcssa
 }
 
+;CHECK: @reduction_and
+;CHECK: and <4 x i32>
+;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: ret i32
+define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %and = and i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_or
+;CHECK: or <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %or = or i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_xor
+;CHECK: xor <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %xor = xor i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
 
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  ret i32 %result.0.lcssa
+}

Modified: llvm/branches/R600/test/Transforms/LoopVectorize/scalar-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/scalar-select.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/scalar-select.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/scalar-select.ll Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"

Copied: llvm/branches/R600/test/Transforms/LoopVectorize/small-loop.ll (from r167837, llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/small-loop.ll?p2=llvm/branches/R600/test/Transforms/LoopVectorize/small-loop.ll&p1=llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll&r1=167837&r2=167838&rev=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/non-const-n.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/small-loop.ll Tue Nov 13 09:21:47 2012
@@ -1,21 +1,16 @@
-; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
+ at a = common global [2048 x i32] zeroinitializer, align 16
 @b = common global [2048 x i32] zeroinitializer, align 16
 @c = common global [2048 x i32] zeroinitializer, align 16
- at a = common global [2048 x i32] zeroinitializer, align 16
 
 ;CHECK: @example1
-;CHECK: shl i32
-;CHECK: zext i32
-;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
-;CHECK: store <4 x i32>
+;CHECK-NOT: load <4 x i32>
 ;CHECK: ret void
-define void @example1(i32 %n) nounwind uwtable ssp {
-  %n4 = shl i32 %n, 2
+define void @example1() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
@@ -29,7 +24,7 @@
   store i32 %6, i32* %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n4
+  %exitcond = icmp eq i32 %lftr.wideiv, 8  ;   <-----  A really small trip count.
   br i1 %exitcond, label %8, label %1
 
 ; <label>:8                                       ; preds = %1

Added: llvm/branches/R600/test/Transforms/LoopVectorize/start-non-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/start-non-zero.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/start-non-zero.ll (added)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/start-non-zero.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @start_at_nonzero
+;CHECK: mul nuw <4 x i32>
+;CHECK: ret i32
+define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  %cmp3 = icmp slt i32 %start, %end
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %start to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul nuw i32 %1, 333
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %2, %end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i32 4
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}

Copied: llvm/branches/R600/test/Transforms/LoopVectorize/write-only.ll (from r167837, llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopVectorize/write-only.ll?p2=llvm/branches/R600/test/Transforms/LoopVectorize/write-only.ll&p1=llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll&r1=167837&r2=167838&rev=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopVectorize/induction_plus.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopVectorize/write-only.ll Tue Nov 13 09:21:47 2012
@@ -1,25 +1,21 @@
-; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
- at array = common global [1024 x i32] zeroinitializer, align 16
-
-;CHECK: @array_at_plus_one
-;CHECK: add <4 x i64>
-;CHECK: trunc <4 x i64>
-;CHECK: add i64 %index, 12
+;CHECK: @read_mod_write_single_ptr
+;CHECK: load <4 x float>
 ;CHECK: ret i32
-define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = add nsw i64 %indvars.iv, 12
-  %3 = getelementptr inbounds [1024 x i32]* @array, i64 0, i64 %2
-  %4 = trunc i64 %indvars.iv to i32
-  store i32 %4, i32* %3, align 4
+  %2 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %3 = load float* %2, align 4
+  %4 = fmul float %3, 3.000000e+00
+  store float %4, float* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n

Modified: llvm/branches/R600/test/Transforms/SROA/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/basictest.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/basictest.ll (original)
+++ llvm/branches/R600/test/Transforms/SROA/basictest.ll Tue Nov 13 09:21:47 2012
@@ -577,9 +577,17 @@
   %ai = load i24* %aiptr
 ; CHCEK-NOT: store
 ; CHCEK-NOT: load
-; CHECK:      %[[mask0:.*]] = and i24 undef, -256
-; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
-; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], 65535
+; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]]
 
   %biptr = bitcast [3 x i8]* %b to i24*
   store i24 %ai, i24* %biptr
@@ -591,10 +599,10 @@
   %b2 = load i8* %b2ptr
 ; CHCEK-NOT: store
 ; CHCEK-NOT: load
-; CHECK:      %[[trunc0:.*]] = trunc i24 %[[mask2]] to i8
-; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
+; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
-; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[mask2]], 16
+; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16
 ; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
 
   %bsum0 = add i8 %b0, %b1
@@ -1064,6 +1072,49 @@
   ret void
 }
 
+define i64 @PR14059.2({ float, float }* %phi) {
+; Check that SROA can split up alloca-wide integer loads and stores where the
+; underlying alloca has smaller components that are accessed independently. This
+; shows up particularly with ABI lowering patterns coming out of Clang that rely
+; on the particular register placement of a single large integer return value.
+; CHECK: @PR14059.2
+
+entry:
+  %retval = alloca { float, float }, align 4
+  ; CHECK-NOT: alloca
+
+  %0 = bitcast { float, float }* %retval to i64*
+  store i64 0, i64* %0
+  ; CHECK-NOT: store
+
+  %phi.realp = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
+  %phi.real = load float* %phi.realp
+  %phi.imagp = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
+  %phi.imag = load float* %phi.imagp
+  ; CHECK:      %[[realp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
+  ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]]
+  ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
+  ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]]
+
+  %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float %phi.real, float* %real
+  store float %phi.imag, float* %imag
+  ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
+  ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
+  ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
+  ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
+  ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
+  ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
+  ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
+  ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
+  ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
+
+  %1 = load i64* %0, align 1
+  ret i64 %1
+  ; CHECK-NEXT: ret i64 %[[real_insert]]
+}
+
 define void @PR14105({ [16 x i8] }* %ptr) {
 ; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
 ; sign as negative. We use a volatile memcpy to ensure promotion never actually

Modified: llvm/branches/R600/test/Transforms/SROA/big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/big-endian.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/big-endian.ll (original)
+++ llvm/branches/R600/test/Transforms/SROA/big-endian.ll Tue Nov 13 09:21:47 2012
@@ -26,9 +26,17 @@
   %ai = load i24* %aiptr
 ; CHCEK-NOT: store
 ; CHCEK-NOT: load
-; CHECK:      %[[mask0:.*]] = and i24 undef, 65535
-; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[mask0]], -65281
-; CHECK-NEXT: %[[mask2:.*]] = and i24 %[[mask1]], -256
+; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]]
 
   %biptr = bitcast [3 x i8]* %b to i24*
   store i24 %ai, i24* %biptr
@@ -40,11 +48,11 @@
   %b2 = load i8* %b2ptr
 ; CHCEK-NOT: store
 ; CHCEK-NOT: load
-; CHECK:      %[[shift0:.*]] = lshr i24 %[[mask2]], 16
+; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
 ; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
-; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[mask2]], 8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
-; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[mask2]] to i8
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8
 
   %bsum0 = add i8 %b0, %b1
   %bsum1 = add i8 %bsum0, %b2
@@ -74,27 +82,26 @@
 
   %a0i16ptr = bitcast i8* %a0ptr to i16*
   store i16 1, i16* %a0i16ptr
-; CHECK:      %[[mask:.*]] = and i56 undef, 1099511627775
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776
+; CHECK:      %[[mask0:.*]] = and i16 1, -16
 
   %a1i4ptr = bitcast i8* %a1ptr to i4*
   store i4 1, i4* %a1i4ptr
-; CHECK:      %[[mask:.*]] = and i56 %[[or]], -16492674416641
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1099511627776
+; CHECK-NEXT: %[[insert0:.*]] = or i16 %[[mask0]], 1
 
   store i8 1, i8* %a2ptr
-; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1095216660481
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 4294967296
+; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, 4294967295
+; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
 
   %a3i24ptr = bitcast i8* %a3ptr to i24*
   store i24 1, i24* %a3i24ptr
-; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -4294967041
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 256
+; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
+; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256
 
   %a2i40ptr = bitcast i8* %a2ptr to i40*
   store i40 1, i40* %a2i40ptr
-; CHECK-NEXT: %[[mask:.*]] = and i56 %[[or]], -1099511627776
-; CHECK-NEXT: %[[or:.*]] = or i56 %[[mask]], 1
+; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56
+; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
 
 ; CHCEK-NOT: store
 ; CHCEK-NOT: load
@@ -103,6 +110,10 @@
   %ai = load i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
-; CHECK:      %[[ret:.*]] = zext i56 %[[or]] to i64
+; CHECK-NEXT: %[[ext4:.*]] = zext i16 %[[insert0]] to i56
+; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
+; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
+; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
+; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
 ; CHECK-NEXT: ret i64 %[[ret]]
 }

Modified: llvm/branches/R600/test/Transforms/SROA/vector-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/vector-promotion.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/vector-promotion.ll (original)
+++ llvm/branches/R600/test/Transforms/SROA/vector-promotion.ll Tue Nov 13 09:21:47 2012
@@ -205,3 +205,18 @@
   %res = load i64* %addr, align 4
   ret i64 %res
 }
+
+define i32 @PR14212() {
+; CHECK: @PR14212
+; This caused a crash when "splitting" the load of the i32 in order to promote
+; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case.
+entry:
+  %retval = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  store <3 x i8> undef, <3 x i8>* %retval, align 4
+  %cast = bitcast <3 x i8>* %retval to i32*
+  %load = load i32* %cast, align 4
+  ret i32 %load
+; CHECK: ret i32
+}

Added: llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg (added)
+++ llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg Tue Nov 13 09:21:47 2012
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Sparc' in targets:
+    config.unsupported = True
+

Added: llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll (added)
+++ llvm/branches/R600/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll Tue Nov 13 09:21:47 2012
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S -mtriple=sparc-unknown-unknown | FileCheck %s
+
+; Check that switches are not turned into lookup tables, as this is not
+; considered profitable on the target.
+
+define i32 @f(i32 %c) nounwind uwtable readnone {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+    i32 47, label %sw.bb5
+    i32 48, label %sw.bb6
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+  %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i32 %retval.0
+
+; CHECK: @f
+; CHECK-NOT: getelementptr
+; CHECK: switch i32 %c
+}

Modified: llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll Tue Nov 13 09:21:47 2012
@@ -15,12 +15,21 @@
 ; The table for @earlyreturncrash
 ; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
 
+; The table for @large.
+; CHECK: @switch.table4 = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9,
+
+; The table for @cprop
+; CHECK: @switch.table5 = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7]
+
+; The table for @unreachable
+; CHECK: @switch.table6 = private unnamed_addr constant [5 x i32] [i32 0, i32 0, i32 0, i32 1, i32 -1]
+
 ; A simple int-to-int selection switch.
 ; It is dense enough to be replaced by table lookup.
 ; The result is directly by a ret from an otherwise empty bb,
 ; so we return early, directly from the lookup bb.
 
-define i32 @f(i32 %c) nounwind uwtable readnone {
+define i32 @f(i32 %c) {
 entry:
   switch i32 %c, label %sw.default [
     i32 42, label %return
@@ -233,7 +242,7 @@
 }
 
 ; PR13946
-define i32 @overflow(i32 %type) nounwind {
+define i32 @overflow(i32 %type) {
 entry:
   switch i32 %type, label %sw.default [
     i32 -2147483648, label %sw.bb
@@ -244,23 +253,12 @@
     i32 3, label %sw.bb3
   ]
 
-sw.bb:
-  br label %if.end
-
-sw.bb1:
-  br label %if.end
-
-sw.bb2:
-  br label %if.end
-
-sw.bb3:
-  br label %if.end
-
-sw.default:
-  br label %if.end
-
-if.else:
-  br label %if.end
+sw.bb: br label %if.end
+sw.bb1: br label %if.end
+sw.bb2: br label %if.end
+sw.bb3: br label %if.end
+sw.default: br label %if.end
+if.else: br label %if.end
 
 if.end:
   %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
@@ -271,7 +269,7 @@
 }
 
 ; PR13985
-define i1 @undef(i32 %tmp) uwtable ssp {
+define i1 @undef(i32 %tmp) {
 bb:
   switch i32 %tmp, label %bb3 [
     i32 0, label %bb1
@@ -280,16 +278,502 @@
     i32 8, label %bb2
   ]
 
-bb1:                                              ; preds = %bb, %bb
-  br label %bb3
-
-bb2:                                              ; preds = %bb, %bb
-  br label %bb3
+bb1: br label %bb3
+bb2: br label %bb3
 
-bb3:                                              ; preds = %bb2, %bb1, %bb
+bb3:
   %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ]
   ret i1 %tmp4
 ; CHECK: define i1 @undef
 ; CHECK: %switch.cast = trunc i32 %switch.tableidx to i9
 ; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt
 }
+
+; Also handle large switches that would be rejected by
+; isValueEqualityComparison()
+; CHECK: large
+; CHECK-NOT: switch i32
+define i32 @large(i32 %x) {
+entry:
+  %cmp = icmp slt i32 %x, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %mul = mul i32 %x, -10
+  br label %if.end
+
+if.end:
+  %x.addr.0 = phi i32 [ %mul, %if.then ], [ %x, %entry ]
+  switch i32 %x.addr.0, label %return [
+    i32 199, label %sw.bb203
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb4
+    i32 5, label %sw.bb5
+    i32 6, label %sw.bb6
+    i32 7, label %sw.bb7
+    i32 8, label %sw.bb8
+    i32 9, label %sw.bb9
+    i32 10, label %sw.bb10
+    i32 11, label %sw.bb11
+    i32 12, label %sw.bb12
+    i32 13, label %sw.bb13
+    i32 14, label %sw.bb14
+    i32 15, label %sw.bb15
+    i32 16, label %sw.bb16
+    i32 17, label %sw.bb17
+    i32 18, label %sw.bb18
+    i32 19, label %sw.bb19
+    i32 20, label %sw.bb20
+    i32 21, label %sw.bb21
+    i32 22, label %sw.bb22
+    i32 23, label %sw.bb23
+    i32 24, label %sw.bb24
+    i32 25, label %sw.bb25
+    i32 26, label %sw.bb26
+    i32 27, label %sw.bb27
+    i32 28, label %sw.bb28
+    i32 29, label %sw.bb29
+    i32 30, label %sw.bb30
+    i32 31, label %sw.bb31
+    i32 32, label %sw.bb32
+    i32 33, label %sw.bb33
+    i32 34, label %sw.bb34
+    i32 35, label %sw.bb35
+    i32 36, label %sw.bb37
+    i32 37, label %sw.bb38
+    i32 38, label %sw.bb39
+    i32 39, label %sw.bb40
+    i32 40, label %sw.bb41
+    i32 41, label %sw.bb42
+    i32 42, label %sw.bb43
+    i32 43, label %sw.bb44
+    i32 44, label %sw.bb45
+    i32 45, label %sw.bb47
+    i32 46, label %sw.bb48
+    i32 47, label %sw.bb49
+    i32 48, label %sw.bb50
+    i32 49, label %sw.bb51
+    i32 50, label %sw.bb52
+    i32 51, label %sw.bb53
+    i32 52, label %sw.bb54
+    i32 53, label %sw.bb55
+    i32 54, label %sw.bb56
+    i32 55, label %sw.bb58
+    i32 56, label %sw.bb59
+    i32 57, label %sw.bb60
+    i32 58, label %sw.bb61
+    i32 59, label %sw.bb62
+    i32 60, label %sw.bb63
+    i32 61, label %sw.bb64
+    i32 62, label %sw.bb65
+    i32 63, label %sw.bb66
+    i32 64, label %sw.bb67
+    i32 65, label %sw.bb68
+    i32 66, label %sw.bb69
+    i32 67, label %sw.bb70
+    i32 68, label %sw.bb71
+    i32 69, label %sw.bb72
+    i32 70, label %sw.bb73
+    i32 71, label %sw.bb74
+    i32 72, label %sw.bb76
+    i32 73, label %sw.bb77
+    i32 74, label %sw.bb78
+    i32 75, label %sw.bb79
+    i32 76, label %sw.bb80
+    i32 77, label %sw.bb81
+    i32 78, label %sw.bb82
+    i32 79, label %sw.bb83
+    i32 80, label %sw.bb84
+    i32 81, label %sw.bb85
+    i32 82, label %sw.bb86
+    i32 83, label %sw.bb87
+    i32 84, label %sw.bb88
+    i32 85, label %sw.bb89
+    i32 86, label %sw.bb90
+    i32 87, label %sw.bb91
+    i32 88, label %sw.bb92
+    i32 89, label %sw.bb93
+    i32 90, label %sw.bb94
+    i32 91, label %sw.bb95
+    i32 92, label %sw.bb96
+    i32 93, label %sw.bb97
+    i32 94, label %sw.bb98
+    i32 95, label %sw.bb99
+    i32 96, label %sw.bb100
+    i32 97, label %sw.bb101
+    i32 98, label %sw.bb102
+    i32 99, label %sw.bb103
+    i32 100, label %sw.bb104
+    i32 101, label %sw.bb105
+    i32 102, label %sw.bb106
+    i32 103, label %sw.bb107
+    i32 104, label %sw.bb108
+    i32 105, label %sw.bb109
+    i32 106, label %sw.bb110
+    i32 107, label %sw.bb111
+    i32 108, label %sw.bb112
+    i32 109, label %sw.bb113
+    i32 110, label %sw.bb114
+    i32 111, label %sw.bb115
+    i32 112, label %sw.bb116
+    i32 113, label %sw.bb117
+    i32 114, label %sw.bb118
+    i32 115, label %sw.bb119
+    i32 116, label %sw.bb120
+    i32 117, label %sw.bb121
+    i32 118, label %sw.bb122
+    i32 119, label %sw.bb123
+    i32 120, label %sw.bb124
+    i32 121, label %sw.bb125
+    i32 122, label %sw.bb126
+    i32 123, label %sw.bb127
+    i32 124, label %sw.bb128
+    i32 125, label %sw.bb129
+    i32 126, label %sw.bb130
+    i32 127, label %sw.bb131
+    i32 128, label %sw.bb132
+    i32 129, label %sw.bb133
+    i32 130, label %sw.bb134
+    i32 131, label %sw.bb135
+    i32 132, label %sw.bb136
+    i32 133, label %sw.bb137
+    i32 134, label %sw.bb138
+    i32 135, label %sw.bb139
+    i32 136, label %sw.bb140
+    i32 137, label %sw.bb141
+    i32 138, label %sw.bb142
+    i32 139, label %sw.bb143
+    i32 140, label %sw.bb144
+    i32 141, label %sw.bb145
+    i32 142, label %sw.bb146
+    i32 143, label %sw.bb147
+    i32 144, label %sw.bb148
+    i32 145, label %sw.bb149
+    i32 146, label %sw.bb150
+    i32 147, label %sw.bb151
+    i32 148, label %sw.bb152
+    i32 149, label %sw.bb153
+    i32 150, label %sw.bb154
+    i32 151, label %sw.bb155
+    i32 152, label %sw.bb156
+    i32 153, label %sw.bb157
+    i32 154, label %sw.bb158
+    i32 155, label %sw.bb159
+    i32 156, label %sw.bb160
+    i32 157, label %sw.bb161
+    i32 158, label %sw.bb162
+    i32 159, label %sw.bb163
+    i32 160, label %sw.bb164
+    i32 161, label %sw.bb165
+    i32 162, label %sw.bb166
+    i32 163, label %sw.bb167
+    i32 164, label %sw.bb168
+    i32 165, label %sw.bb169
+    i32 166, label %sw.bb170
+    i32 167, label %sw.bb171
+    i32 168, label %sw.bb172
+    i32 169, label %sw.bb173
+    i32 170, label %sw.bb174
+    i32 171, label %sw.bb175
+    i32 172, label %sw.bb176
+    i32 173, label %sw.bb177
+    i32 174, label %sw.bb178
+    i32 175, label %sw.bb179
+    i32 176, label %sw.bb180
+    i32 177, label %sw.bb181
+    i32 178, label %sw.bb182
+    i32 179, label %sw.bb183
+    i32 180, label %sw.bb184
+    i32 181, label %sw.bb185
+    i32 182, label %sw.bb186
+    i32 183, label %sw.bb187
+    i32 184, label %sw.bb188
+    i32 185, label %sw.bb189
+    i32 186, label %sw.bb190
+    i32 187, label %sw.bb191
+    i32 188, label %sw.bb192
+    i32 189, label %sw.bb193
+    i32 190, label %sw.bb194
+    i32 191, label %sw.bb195
+    i32 192, label %sw.bb196
+    i32 193, label %sw.bb197
+    i32 194, label %sw.bb198
+    i32 195, label %sw.bb199
+    i32 196, label %sw.bb200
+    i32 197, label %sw.bb201
+    i32 198, label %sw.bb202
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.bb7: br label %return
+sw.bb8: br label %return
+sw.bb9: br label %return
+sw.bb10: br label %return
+sw.bb11: br label %return
+sw.bb12: br label %return
+sw.bb13: br label %return
+sw.bb14: br label %return
+sw.bb15: br label %return
+sw.bb16: br label %return
+sw.bb17: br label %return
+sw.bb18: br label %return
+sw.bb19: br label %return
+sw.bb20: br label %return
+sw.bb21: br label %return
+sw.bb22: br label %return
+sw.bb23: br label %return
+sw.bb24: br label %return
+sw.bb25: br label %return
+sw.bb26: br label %return
+sw.bb27: br label %return
+sw.bb28: br label %return
+sw.bb29: br label %return
+sw.bb30: br label %return
+sw.bb31: br label %return
+sw.bb32: br label %return
+sw.bb33: br label %return
+sw.bb34: br label %return
+sw.bb35: br label %return
+sw.bb37: br label %return
+sw.bb38: br label %return
+sw.bb39: br label %return
+sw.bb40: br label %return
+sw.bb41: br label %return
+sw.bb42: br label %return
+sw.bb43: br label %return
+sw.bb44: br label %return
+sw.bb45: br label %return
+sw.bb47: br label %return
+sw.bb48: br label %return
+sw.bb49: br label %return
+sw.bb50: br label %return
+sw.bb51: br label %return
+sw.bb52: br label %return
+sw.bb53: br label %return
+sw.bb54: br label %return
+sw.bb55: br label %return
+sw.bb56: br label %return
+sw.bb58: br label %return
+sw.bb59: br label %return
+sw.bb60: br label %return
+sw.bb61: br label %return
+sw.bb62: br label %return
+sw.bb63: br label %return
+sw.bb64: br label %return
+sw.bb65: br label %return
+sw.bb66: br label %return
+sw.bb67: br label %return
+sw.bb68: br label %return
+sw.bb69: br label %return
+sw.bb70: br label %return
+sw.bb71: br label %return
+sw.bb72: br label %return
+sw.bb73: br label %return
+sw.bb74: br label %return
+sw.bb76: br label %return
+sw.bb77: br label %return
+sw.bb78: br label %return
+sw.bb79: br label %return
+sw.bb80: br label %return
+sw.bb81: br label %return
+sw.bb82: br label %return
+sw.bb83: br label %return
+sw.bb84: br label %return
+sw.bb85: br label %return
+sw.bb86: br label %return
+sw.bb87: br label %return
+sw.bb88: br label %return
+sw.bb89: br label %return
+sw.bb90: br label %return
+sw.bb91: br label %return
+sw.bb92: br label %return
+sw.bb93: br label %return
+sw.bb94: br label %return
+sw.bb95: br label %return
+sw.bb96: br label %return
+sw.bb97: br label %return
+sw.bb98: br label %return
+sw.bb99: br label %return
+sw.bb100: br label %return
+sw.bb101: br label %return
+sw.bb102: br label %return
+sw.bb103: br label %return
+sw.bb104: br label %return
+sw.bb105: br label %return
+sw.bb106: br label %return
+sw.bb107: br label %return
+sw.bb108: br label %return
+sw.bb109: br label %return
+sw.bb110: br label %return
+sw.bb111: br label %return
+sw.bb112: br label %return
+sw.bb113: br label %return
+sw.bb114: br label %return
+sw.bb115: br label %return
+sw.bb116: br label %return
+sw.bb117: br label %return
+sw.bb118: br label %return
+sw.bb119: br label %return
+sw.bb120: br label %return
+sw.bb121: br label %return
+sw.bb122: br label %return
+sw.bb123: br label %return
+sw.bb124: br label %return
+sw.bb125: br label %return
+sw.bb126: br label %return
+sw.bb127: br label %return
+sw.bb128: br label %return
+sw.bb129: br label %return
+sw.bb130: br label %return
+sw.bb131: br label %return
+sw.bb132: br label %return
+sw.bb133: br label %return
+sw.bb134: br label %return
+sw.bb135: br label %return
+sw.bb136: br label %return
+sw.bb137: br label %return
+sw.bb138: br label %return
+sw.bb139: br label %return
+sw.bb140: br label %return
+sw.bb141: br label %return
+sw.bb142: br label %return
+sw.bb143: br label %return
+sw.bb144: br label %return
+sw.bb145: br label %return
+sw.bb146: br label %return
+sw.bb147: br label %return
+sw.bb148: br label %return
+sw.bb149: br label %return
+sw.bb150: br label %return
+sw.bb151: br label %return
+sw.bb152: br label %return
+sw.bb153: br label %return
+sw.bb154: br label %return
+sw.bb155: br label %return
+sw.bb156: br label %return
+sw.bb157: br label %return
+sw.bb158: br label %return
+sw.bb159: br label %return
+sw.bb160: br label %return
+sw.bb161: br label %return
+sw.bb162: br label %return
+sw.bb163: br label %return
+sw.bb164: br label %return
+sw.bb165: br label %return
+sw.bb166: br label %return
+sw.bb167: br label %return
+sw.bb168: br label %return
+sw.bb169: br label %return
+sw.bb170: br label %return
+sw.bb171: br label %return
+sw.bb172: br label %return
+sw.bb173: br label %return
+sw.bb174: br label %return
+sw.bb175: br label %return
+sw.bb176: br label %return
+sw.bb177: br label %return
+sw.bb178: br label %return
+sw.bb179: br label %return
+sw.bb180: br label %return
+sw.bb181: br label %return
+sw.bb182: br label %return
+sw.bb183: br label %return
+sw.bb184: br label %return
+sw.bb185: br label %return
+sw.bb186: br label %return
+sw.bb187: br label %return
+sw.bb188: br label %return
+sw.bb189: br label %return
+sw.bb190: br label %return
+sw.bb191: br label %return
+sw.bb192: br label %return
+sw.bb193: br label %return
+sw.bb194: br label %return
+sw.bb195: br label %return
+sw.bb196: br label %return
+sw.bb197: br label %return
+sw.bb198: br label %return
+sw.bb199: br label %return
+sw.bb200: br label %return
+sw.bb201: br label %return
+sw.bb202: br label %return
+sw.bb203: br label %return
+
+return:
+  %retval.0 = phi i32 [ 39204, %sw.bb202 ], [ 38809, %sw.bb201 ], [ 38416, %sw.bb200 ], [ 38025, %sw.bb199 ], [ 37636, %sw.bb198 ], [ 37249, %sw.bb197 ], [ 36864, %sw.bb196 ], [ 36481, %sw.bb195 ], [ 36100, %sw.bb194 ], [ 35721, %sw.bb193 ], [ 35344, %sw.bb192 ], [ 34969, %sw.bb191 ], [ 34596, %sw.bb190 ], [ 34225, %sw.bb189 ], [ 33856, %sw.bb188 ], [ 33489, %sw.bb187 ], [ 33124, %sw.bb186 ], [ 32761, %sw.bb185 ], [ 32400, %sw.bb184 ], [ 32041, %sw.bb183 ], [ 31684, %sw.bb182 ], [ 31329, %sw.bb181 ], [ 30976, %sw.bb180 ], [ 30625, %sw.bb179 ], [ 30276, %sw.bb178 ], [ 29929, %sw.bb177 ], [ 29584, %sw.bb176 ], [ 29241, %sw.bb175 ], [ 28900, %sw.bb174 ], [ 28561, %sw.bb173 ], [ 28224, %sw.bb172 ], [ 27889, %sw.bb171 ], [ 27556, %sw.bb170 ], [ 27225, %sw.bb169 ], [ 26896, %sw.bb168 ], [ 26569, %sw.bb167 ], [ 26244, %sw.bb166 ], [ 25921, %sw.bb165 ], [ 25600, %sw.bb164 ], [ 25281, %sw.bb163 ], [ 24964, %sw.bb162 ], [ 24649, %sw.bb161 ], [ 24336, %sw.bb160 ], [ 24025, %sw.bb159 ],
  [ 23716, %sw.bb158 ], [ 23409, %sw.bb157 ], [ 23104, %sw.bb156 ], [ 22801, %sw.bb155 ], [ 22500, %sw.bb154 ], [ 22201, %sw.bb153 ], [ 21904, %sw.bb152 ], [ 21609, %sw.bb151 ], [ 21316, %sw.bb150 ], [ 21025, %sw.bb149 ], [ 20736, %sw.bb148 ], [ 20449, %sw.bb147 ], [ 20164, %sw.bb146 ], [ 19881, %sw.bb145 ], [ 19600, %sw.bb144 ], [ 19321, %sw.bb143 ], [ 19044, %sw.bb142 ], [ 18769, %sw.bb141 ], [ 18496, %sw.bb140 ], [ 18225, %sw.bb139 ], [ 17956, %sw.bb138 ], [ 17689, %sw.bb137 ], [ 17424, %sw.bb136 ], [ 17161, %sw.bb135 ], [ 16900, %sw.bb134 ], [ 16641, %sw.bb133 ], [ 16384, %sw.bb132 ], [ 16129, %sw.bb131 ], [ 15876, %sw.bb130 ], [ 15625, %sw.bb129 ], [ 15376, %sw.bb128 ], [ 15129, %sw.bb127 ], [ 14884, %sw.bb126 ], [ 14641, %sw.bb125 ], [ 14400, %sw.bb124 ], [ 14161, %sw.bb123 ], [ 13924, %sw.bb122 ], [ 13689, %sw.bb121 ], [ 13456, %sw.bb120 ], [ 13225, %sw.bb119 ], [ 12996, %sw.bb118 ], [ 12769, %sw.bb117 ], [ 12544, %sw.bb116 ], [ 12321, %sw.bb115 ], [ 12100, %sw.bb114 ]
 , [ 11881, %sw.bb113 ], [ 11664, %sw.bb112 ], [ 11449, %sw.bb111 ], [ 11236, %sw.bb110 ], [ 11025, %sw.bb109 ], [ 10816, %sw.bb108 ], [ 10609, %sw.bb107 ], [ 10404, %sw.bb106 ], [ 10201, %sw.bb105 ], [ 10000, %sw.bb104 ], [ 9801, %sw.bb103 ], [ 9604, %sw.bb102 ], [ 9409, %sw.bb101 ], [ 9216, %sw.bb100 ], [ 9025, %sw.bb99 ], [ 8836, %sw.bb98 ], [ 8649, %sw.bb97 ], [ 8464, %sw.bb96 ], [ 8281, %sw.bb95 ], [ 8100, %sw.bb94 ], [ 7921, %sw.bb93 ], [ 7744, %sw.bb92 ], [ 7569, %sw.bb91 ], [ 7396, %sw.bb90 ], [ 7225, %sw.bb89 ], [ 7056, %sw.bb88 ], [ 6889, %sw.bb87 ], [ 6724, %sw.bb86 ], [ 6561, %sw.bb85 ], [ 6400, %sw.bb84 ], [ 6241, %sw.bb83 ], [ 6084, %sw.bb82 ], [ 5929, %sw.bb81 ], [ 5776, %sw.bb80 ], [ 5625, %sw.bb79 ], [ 5476, %sw.bb78 ], [ 5329, %sw.bb77 ], [ 5184, %sw.bb76 ], [ 5112, %sw.bb74 ], [ 4900, %sw.bb73 ], [ 4761, %sw.bb72 ], [ 4624, %sw.bb71 ], [ 4489, %sw.bb70 ], [ 4356, %sw.bb69 ], [ 4225, %sw.bb68 ], [ 4096, %sw.bb67 ], [ 3969, %sw.bb66 ], [ 3844, %sw.bb65 ], [ 3
 721, %sw.bb64 ], [ 3600, %sw.bb63 ], [ 3481, %sw.bb62 ], [ 3364, %sw.bb61 ], [ 3249, %sw.bb60 ], [ 3136, %sw.bb59 ], [ 3025, %sw.bb58 ], [ 2970, %sw.bb56 ], [ 2809, %sw.bb55 ], [ 2704, %sw.bb54 ], [ 2601, %sw.bb53 ], [ 2500, %sw.bb52 ], [ 2401, %sw.bb51 ], [ 2304, %sw.bb50 ], [ 2209, %sw.bb49 ], [ 2116, %sw.bb48 ], [ 2025, %sw.bb47 ], [ 1980, %sw.bb45 ], [ 1849, %sw.bb44 ], [ 1764, %sw.bb43 ], [ 1681, %sw.bb42 ], [ 1600, %sw.bb41 ], [ 1521, %sw.bb40 ], [ 1444, %sw.bb39 ], [ 1369, %sw.bb38 ], [ 1296, %sw.bb37 ], [ 1260, %sw.bb35 ], [ 1156, %sw.bb34 ], [ 1089, %sw.bb33 ], [ 1024, %sw.bb32 ], [ 961, %sw.bb31 ], [ 900, %sw.bb30 ], [ 841, %sw.bb29 ], [ 784, %sw.bb28 ], [ 729, %sw.bb27 ], [ 676, %sw.bb26 ], [ 625, %sw.bb25 ], [ 576, %sw.bb24 ], [ 529, %sw.bb23 ], [ 484, %sw.bb22 ], [ 441, %sw.bb21 ], [ 400, %sw.bb20 ], [ 361, %sw.bb19 ], [ 342, %sw.bb18 ], [ 289, %sw.bb17 ], [ 256, %sw.bb16 ], [ 225, %sw.bb15 ], [ 196, %sw.bb14 ], [ 169, %sw.bb13 ], [ 144, %sw.bb12 ], [ 121, %sw.b
 b11 ], [ 100, %sw.bb10 ], [ 81, %sw.bb9 ], [ 64, %sw.bb8 ], [ 49, %sw.bb7 ], [ 36, %sw.bb6 ], [ 25, %sw.bb5 ], [ 16, %sw.bb4 ], [ 9, %sw.bb3 ], [ 4, %sw.bb2 ], [ 1, %sw.bb1 ], [ 39601, %sw.bb203 ], [ 0, %if.end ]
+  ret i32 %retval.0
+}
+
+define i32 @cprop(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 1, label %return
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb2
+    i32 4, label %sw.bb2
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+    i32 7, label %sw.bb3
+  ]
+
+sw.bb1: br label %return
+
+sw.bb2:
+  %and = and i32 %x, 1
+  %tobool = icmp ne i32 %and, 0
+  %cond = select i1 %tobool, i32 -123, i32 456
+  %sub = sub nsw i32 %x, %cond
+  br label %return
+
+sw.bb3:
+  %trunc = trunc i32 %x to i8
+  %sext = sext i8 %trunc to i32
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 123, %sw.default ], [ %sext, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
+  ret i32 %retval.0
+
+; CHECK: @cprop
+; CHECK: switch.lookup:
+; CHECK: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table5, i32 0, i32 %switch.tableidx
+}
+
+define i32 @unreachable(i32 %x)  {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb1
+    i32 4, label %sw.bb2
+    i32 5, label %sw.bb3
+    i32 6, label %sw.bb3
+    i32 7, label %sw.bb3
+    i32 8, label %sw.bb3
+  ]
+
+sw.bb: br label %return
+sw.bb1: unreachable
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: unreachable
+
+return:
+  %retval.0 = phi i32 [ 1, %sw.bb3 ], [ -1, %sw.bb2 ], [ 0, %sw.bb ]
+  ret i32 %retval.0
+
+; CHECK: @unreachable
+; CHECK: switch.lookup:
+; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx
+}

Removed: llvm/branches/R600/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll (removed)
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; Test that we add nocapture to the declaration, and to the second call only.
-
-; CHECK: declare float @strtol(i8*, i8** nocapture, i32) nounwind
-declare float @strtol(i8* %s, i8** %endptr, i32 %base)
-
-define void @foo(i8* %x, i8** %endptr) {
-; CHECK:  call float @strtol(i8* %x, i8** %endptr, i32 10)
-  call float @strtol(i8* %x, i8** %endptr, i32 10)
-; CHECK: %2 = call float @strtol(i8* nocapture %x, i8** null, i32 10)
-  call float @strtol(i8* %x, i8** null, i32 10)
-  ret void
-}

Removed: llvm/branches/R600/test/Transforms/SimplifyLibCalls/StpCpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyLibCalls/StpCpy.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyLibCalls/StpCpy.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyLibCalls/StpCpy.ll (removed)
@@ -1,43 +0,0 @@
-; Test that the StpCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
- at hello = constant [6 x i8] c"hello\00"
-
-declare i8* @stpcpy(i8*, i8*)
-
-declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
-define i32 @t1() {
-; CHECK: @t1
-  %target = alloca [1024 x i8]
-  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
-  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
-  %rslt1 = call i8* @stpcpy( i8* %arg1, i8* %arg2 )
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
-  ret i32 0
-}
-
-define i32 @t2() {
-; CHECK: @t2
-  %target = alloca [1024 x i8]
-  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
-  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
-  %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
-  %rslt1 = call i8* @__stpcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
-; CHECK: @__memcpy_chk
-  ret i32 0
-}
-
-define i8* @t3(i8* %arg) {
-; CHECK: @t3
-  %stpcpy = tail call i8* @stpcpy(i8* %arg, i8* %arg)
-; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen(i8* %arg)
-; CHECK-NEXT: getelementptr inbounds i8* %arg, i32 [[LEN]]
-  ret i8* %stpcpy
-}

Removed: llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrLen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrLen.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrLen.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrLen.ll (removed)
@@ -1,62 +0,0 @@
-; Test that the StrCatOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:    not grep "call.*strlen"
-
-target datalayout = "e-p:32:32"
- at hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=3]
- at null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=3]
- at null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
- at nullstring = constant i8 0
-
-declare i32 @strlen(i8*)
-
-define i32 @test1() {
-	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%hello_l = call i32 @strlen( i8* %hello_p )		; <i32> [#uses=1]
-	ret i32 %hello_l
-}
-
-define i32 @test2() {
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%null_l = call i32 @strlen( i8* %null_p )		; <i32> [#uses=1]
-	ret i32 %null_l
-}
-
-define i32 @test3() {
-	%null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%null_hello_l = call i32 @strlen( i8* %null_hello_p )		; <i32> [#uses=1]
-	ret i32 %null_hello_l
-}
-
-define i1 @test4() {
-	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%hello_l = call i32 @strlen( i8* %hello_p )		; <i32> [#uses=1]
-	%eq_hello = icmp eq i32 %hello_l, 0		; <i1> [#uses=1]
-	ret i1 %eq_hello
-}
-
-define i1 @test5() {
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%null_l = call i32 @strlen( i8* %null_p )		; <i32> [#uses=1]
-	%eq_null = icmp eq i32 %null_l, 0		; <i1> [#uses=1]
-	ret i1 %eq_null
-}
-
-define i1 @test6() {
-	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%hello_l = call i32 @strlen( i8* %hello_p )		; <i32> [#uses=1]
-	%ne_hello = icmp ne i32 %hello_l, 0		; <i1> [#uses=1]
-	ret i1 %ne_hello
-}
-
-define i1 @test7() {
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%null_l = call i32 @strlen( i8* %null_p )		; <i32> [#uses=1]
-	%ne_null = icmp ne i32 %null_l, 0		; <i1> [#uses=1]
-	ret i1 %ne_null
-}
-
-define i32 @test8() {
-	%len = tail call i32 @strlen(i8* @nullstring) nounwind
-	ret i32 %len
-}

Removed: llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrNCpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrNCpy.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrNCpy.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrNCpy.ll (removed)
@@ -1,29 +0,0 @@
-; Test that the StrNCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*strncpy"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
- at hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
- at null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
- at null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-
-declare i8* @strncpy(i8*, i8*, i32)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
-	store i8 0, i8* %arg1
-	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt1 = call i8* @strncpy( i8* %arg1, i8* %arg2, i32 6 )		; <i8*> [#uses=1]
-	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt2 = call i8* @strncpy( i8* %rslt1, i8* %arg3, i32 42 )		; <i8*> [#uses=1]
-	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt3 = call i8* @strncpy( i8* %rslt2, i8* %arg4, i32 42 )		; <i8*> [#uses=1]
-	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
-	ret i32 0
-}

Removed: llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrPBrk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrPBrk.ll?rev=167837&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrPBrk.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyLibCalls/StrPBrk.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
- at hello = constant [12 x i8] c"hello world\00"
- at w = constant [2 x i8] c"w\00"
- at null = constant [1 x i8] zeroinitializer
-
-declare i8* @strpbrk(i8*, i8*)
-
-define void @test(i8* %s1, i8* %s2) {
-	%hello_p = getelementptr [12 x i8]* @hello, i32 0, i32 0
-	%w_p = getelementptr [2 x i8]* @w, i32 0, i32 0
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
-	%test1 = call i8* @strpbrk(i8* %null_p, i8* %s2)
-	%test2 = call i8* @strpbrk(i8* %s1, i8* %null_p)
-; CHECK-NOT: call i8* @strpbrk
-	%test3 = call i8* @strpbrk(i8* %s1, i8* %w_p)
-; CHECK: call i8* @strchr(i8* %s1, i32 119)
-	%test4 = call i8* @strpbrk(i8* %hello_p, i8* %w_p)
-; CHECK: getelementptr i8* %hello_p, i64 6
-	%test5 = call i8* @strpbrk(i8* %s1, i8* %s2)
-; CHECK: call i8* @strpbrk(i8* %s1, i8* %s2)
-	ret void
-}

Modified: llvm/branches/R600/test/Transforms/TailCallElim/nocapture.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/TailCallElim/nocapture.ll?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/TailCallElim/nocapture.ll (original)
+++ llvm/branches/R600/test/Transforms/TailCallElim/nocapture.ll Tue Nov 13 09:21:47 2012
@@ -1,9 +1,9 @@
 ; RUN: opt %s -tailcallelim -S | FileCheck %s
+; XFAIL: *
 
 declare void @use(i8* nocapture, i8* nocapture)
-declare void @boring()
 
-define i8* @test1(i8* nocapture %A, i1 %cond) {
+define i8* @foo(i8* nocapture %A, i1 %cond) {
 ; CHECK: tailrecurse:
 ; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ]
 ; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ]
@@ -14,27 +14,12 @@
 cond_true:
 ; CHECK: cond_true:
 ; CHECK: br label %tailrecurse
-  call i8* @test1(i8* %B, i1 false)
+  call i8* @foo(i8* %B, i1 false)
   ret i8* null
 cond_false:
 ; CHECK: cond_false
   call void @use(i8* %A, i8* %B)
-; CHECK: call void @use(i8* %A.tr, i8* %B)
-  call void @boring()
-; CHECK: tail call void @boring()
+; CHECK: tail call void @use(i8* %A.tr, i8* %B)
   ret i8* null
 ; CHECK: ret i8* null
 }
-
-; PR14143
-define void @test2(i8* %a, i8* %b) {
-; CHECK: @test2
-; CHECK-NOT: tail call
-; CHECK: ret void
-  %c = alloca [100 x i8], align 16
-  %tmp = bitcast [100 x i8]* %c to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %tmp, i64 100, i32 1, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)

Modified: llvm/branches/R600/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/lit.cfg?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/test/lit.cfg (original)
+++ llvm/branches/R600/test/lit.cfg Tue Nov 13 09:21:47 2012
@@ -5,6 +5,7 @@
 import os
 import sys
 import re
+import platform
 
 # name: The name of this test suite.
 config.name = 'LLVM'
@@ -148,7 +149,9 @@
 
 # Provide a substition for those tests that need to run the jit to obtain data
 # but simply want use the currently considered most reliable jit for platform
-if 'arm' in config.target_triple:
+# FIXME: ppc32 is not ready for mcjit.
+if 'arm' in config.target_triple \
+   or 'powerpc64' in config.target_triple:
     defaultIsMCJIT = 'true'
 else:
     defaultIsMCJIT = 'false'
@@ -241,9 +244,9 @@
 if loadable_module:
     config.available_features.add('loadable_module')
 
-# LTO
-if config.lto_is_enabled == "1":
-    config.available_features.add('lto')
+# LTO on OS X
+if config.lto_is_enabled == "1" and platform.system() == "Darwin":
+    config.available_features.add('lto_on_osx')
 
 # llc knows whether he is compiled with -DNDEBUG.
 import subprocess

Modified: llvm/branches/R600/tools/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -36,6 +36,7 @@
 add_subdirectory(bugpoint-passes)
 add_subdirectory(llvm-bcanalyzer)
 add_subdirectory(llvm-stress)
+add_subdirectory(llvm-mcmarkup)
 
 if( NOT WIN32 )
   add_subdirectory(lto)

Modified: llvm/branches/R600/tools/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/LLVMBuild.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/LLVMBuild.txt (original)
+++ llvm/branches/R600/tools/LLVMBuild.txt Tue Nov 13 09:21:47 2012
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
 
 [component_0]
 type = Group

Modified: llvm/branches/R600/tools/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/Makefile?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/Makefile (original)
+++ llvm/branches/R600/tools/Makefile Tue Nov 13 09:21:47 2012
@@ -34,7 +34,7 @@
                  bugpoint llvm-bcanalyzer \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
-	         llvm-size llvm-stress
+	         llvm-size llvm-stress llvm-mcmarkup
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS

Modified: llvm/branches/R600/tools/gold/gold-plugin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/gold/gold-plugin.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/gold/gold-plugin.cpp (original)
+++ llvm/branches/R600/tools/gold/gold-plugin.cpp Tue Nov 13 09:21:47 2012
@@ -378,9 +378,6 @@
     }
   }
 
-  // If we don't preserve any symbols, libLTO will assume that all symbols are
-  // needed. Keep all symbols unless we're producing a final executable.
-  bool anySymbolsPreserved = false;
   for (std::list<claimed_file>::iterator I = Modules.begin(),
          E = Modules.end(); I != E; ++I) {
     if (I->syms.empty())
@@ -389,7 +386,6 @@
     for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
       if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
         lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
-        anySymbolsPreserved = true;
 
         if (options::generate_api_file)
           api_file << I->syms[i].name << "\n";
@@ -400,12 +396,6 @@
   if (options::generate_api_file)
     api_file.close();
 
-  if (!anySymbolsPreserved) {
-    // All of the IL is unnecessary!
-    lto_codegen_dispose(code_gen);
-    return LDPS_OK;
-  }
-
   lto_codegen_set_pic_model(code_gen, output_type);
   lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF);
   if (!options::mcpu.empty())

Modified: llvm/branches/R600/tools/lli/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/lli/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,5 @@
 
-set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
+set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native)
 
 if( LLVM_USE_OPROFILE )
   set(LLVM_LINK_COMPONENTS

Modified: llvm/branches/R600/tools/lli/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/LLVMBuild.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/LLVMBuild.txt (original)
+++ llvm/branches/R600/tools/lli/LLVMBuild.txt Tue Nov 13 09:21:47 2012
@@ -19,4 +19,4 @@
 type = Tool
 name = lli
 parent = Tools
-required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG
+required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native

Modified: llvm/branches/R600/tools/lli/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/Makefile?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/Makefile (original)
+++ llvm/branches/R600/tools/lli/Makefile Tue Nov 13 09:21:47 2012
@@ -12,7 +12,7 @@
 
 include $(LEVEL)/Makefile.config
 
-LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native
 
 # If Intel JIT Events support is confiured, link against the LLVM Intel JIT
 # Events interface library

Modified: llvm/branches/R600/tools/lli/RemoteTarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/RemoteTarget.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/RemoteTarget.cpp (original)
+++ llvm/branches/R600/tools/lli/RemoteTarget.cpp Tue Nov 13 09:21:47 2012
@@ -36,13 +36,13 @@
 
 bool RemoteTarget::loadData(uint64_t Address, const void *Data, size_t Size) {
   memcpy ((void*)Address, Data, Size);
-  sys::MemoryBlock Mem((void*)Address, Size);
-  sys::Memory::setExecutable(Mem, &ErrorMsg);
   return false;
 }
 
 bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) {
   memcpy ((void*)Address, Data, Size);
+  sys::MemoryBlock Mem((void*)Address, Size);
+  sys::Memory::setExecutable(Mem, &ErrorMsg);
   return false;
 }
 

Modified: llvm/branches/R600/tools/lli/lli.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/lli.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/lli.cpp (original)
+++ llvm/branches/R600/tools/lli/lli.cpp Tue Nov 13 09:21:47 2012
@@ -42,6 +42,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/MathExtras.h"
 #include <cerrno>
 
 #ifdef __linux__
@@ -241,7 +242,7 @@
   // the data cache but not to the instruction cache.
   virtual void invalidateInstructionCache();
 
-  // The MCJITMemoryManager doesn't use the following functions, so we don't
+  // The RTDyldMemoryManager doesn't use the following functions, so we don't
   // need implement them.
   virtual void setMemoryWritable() {
     llvm_unreachable("Unexpected call!");
@@ -303,9 +304,16 @@
                                                     unsigned SectionID) {
   if (!Alignment)
     Alignment = 16;
-  uint8_t *Addr = (uint8_t*)calloc((Size + Alignment - 1)/Alignment, Alignment);
-  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, Size));
-  return Addr;
+  // Ensure that enough memory is requested to allow aligning.
+  size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment;
+  uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment);
+
+  // Honour the alignment requirement.
+  uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment);
+
+  // Store the original address from calloc so we can free it later.
+  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment));
+  return AlignedAddr;
 }
 
 uint8_t *LLIMCJITMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -464,9 +472,13 @@
     EE->mapSectionAddress(const_cast<void*>(Offsets[i].first), Addr);
 
     DEBUG(dbgs() << "  Mapping local: " << Offsets[i].first
-                 << " to remote: " << format("%#018x", Addr) << "\n");
+                 << " to remote: " << format("%p", Addr) << "\n");
 
   }
+
+  // Trigger application of relocations
+  EE->finalizeObject();
+
   // Now load it all to the target.
   for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
     uint64_t Addr = RemoteAddr + Offsets[i].second;
@@ -475,12 +487,12 @@
       T->loadCode(Addr, Offsets[i].first, Sizes[i]);
 
       DEBUG(dbgs() << "  loading code: " << Offsets[i].first
-            << " to remote: " << format("%#018x", Addr) << "\n");
+            << " to remote: " << format("%p", Addr) << "\n");
     } else {
       T->loadData(Addr, Offsets[i].first, Sizes[i]);
 
       DEBUG(dbgs() << "  loading data: " << Offsets[i].first
-            << " to remote: " << format("%#018x", Addr) << "\n");
+            << " to remote: " << format("%p", Addr) << "\n");
     }
 
   }
@@ -500,6 +512,7 @@
   // usable by the JIT.
   InitializeNativeTarget();
   InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
 
   cl::ParseCommandLineOptions(argc, argv,
                               "llvm interpreter & dynamic compiler\n");
@@ -685,7 +698,7 @@
     uint64_t Entry = (uint64_t)EE->getPointerToFunction(EntryFn);
 
     DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at "
-                 << format("%#18x", Entry) << "\n");
+                 << format("%p", Entry) << "\n");
 
     if (Target.executeCode(Entry, Result))
       errs() << "ERROR: " << Target.getErrorMsg() << "\n";

Modified: llvm/branches/R600/tools/llvm-ar/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-ar/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ar/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/llvm-ar/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS archive)
-set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvm-ar
   llvm-ar.cpp

Modified: llvm/branches/R600/tools/llvm-ar/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-ar/Makefile?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ar/Makefile (original)
+++ llvm/branches/R600/tools/llvm-ar/Makefile Tue Nov 13 09:21:47 2012
@@ -10,7 +10,6 @@
 LEVEL := ../..
 TOOLNAME := llvm-ar
 LINK_COMPONENTS := archive
-REQUIRES_EH := 1
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1

Modified: llvm/branches/R600/tools/llvm-ar/llvm-ar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-ar/llvm-ar.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ar/llvm-ar.cpp (original)
+++ llvm/branches/R600/tools/llvm-ar/llvm-ar.cpp Tue Nov 13 09:21:47 2012
@@ -23,6 +23,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include <algorithm>
+#include <cstdlib>
 #include <memory>
 #include <fstream>
 using namespace llvm;
@@ -126,40 +127,57 @@
 // The Archive object to which all the editing operations will be sent.
 Archive* TheArchive = 0;
 
+// The name this program was invoked as.
+static const char *program_name;
+
+// show_help - Show the error message, the help message and exit.
+LLVM_ATTRIBUTE_NORETURN static void
+show_help(const std::string &msg) {
+  errs() << program_name << ": " << msg << "\n\n";
+  cl::PrintHelpMessage();
+  if (TheArchive)
+    delete TheArchive;
+  std::exit(1);
+}
+
+// fail - Show the error message and exit.
+LLVM_ATTRIBUTE_NORETURN static void
+fail(const std::string &msg) {
+  errs() << program_name << ": " << msg << "\n\n";
+  if (TheArchive)
+    delete TheArchive;
+  std::exit(1);
+}
+
 // getRelPos - Extract the member filename from the command line for
 // the [relpos] argument associated with a, b, and i modifiers
 void getRelPos() {
-  if(RestOfArgs.size() > 0) {
-    RelPos = RestOfArgs[0];
-    RestOfArgs.erase(RestOfArgs.begin());
-  }
-  else
-    throw "Expected [relpos] for a, b, or i modifier";
+  if(RestOfArgs.size() == 0)
+    show_help("Expected [relpos] for a, b, or i modifier");
+  RelPos = RestOfArgs[0];
+  RestOfArgs.erase(RestOfArgs.begin());
 }
 
 // getCount - Extract the [count] argument associated with the N modifier
 // from the command line and check its value.
 void getCount() {
-  if(RestOfArgs.size() > 0) {
-    Count = atoi(RestOfArgs[0].c_str());
-    RestOfArgs.erase(RestOfArgs.begin());
-  }
-  else
-    throw "Expected [count] value with N modifier";
+  if(RestOfArgs.size() == 0)
+    show_help("Expected [count] value with N modifier");
+
+  Count = atoi(RestOfArgs[0].c_str());
+  RestOfArgs.erase(RestOfArgs.begin());
 
   // Non-positive counts are not allowed
   if (Count < 1)
-    throw "Invalid [count] value (not a positive integer)";
+    show_help("Invalid [count] value (not a positive integer)");
 }
 
 // getArchive - Get the archive file name from the command line
 void getArchive() {
-  if(RestOfArgs.size() > 0) {
-    ArchiveName = RestOfArgs[0];
-    RestOfArgs.erase(RestOfArgs.begin());
-  }
-  else
-    throw "An archive name must be specified.";
+  if(RestOfArgs.size() == 0)
+    show_help("An archive name must be specified");
+  ArchiveName = RestOfArgs[0];
+  RestOfArgs.erase(RestOfArgs.begin());
 }
 
 // getMembers - Copy over remaining items in RestOfArgs to our Members vector
@@ -240,25 +258,27 @@
   // Perform various checks on the operation/modifier specification
   // to make sure we are dealing with a legal request.
   if (NumOperations == 0)
-    throw "You must specify at least one of the operations";
+    show_help("You must specify at least one of the operations");
   if (NumOperations > 1)
-    throw "Only one operation may be specified";
+    show_help("Only one operation may be specified");
   if (NumPositional > 1)
-    throw "You may only specify one of a, b, and i modifiers";
-  if (AddAfter || AddBefore || InsertBefore)
+    show_help("You may only specify one of a, b, and i modifiers");
+  if (AddAfter || AddBefore || InsertBefore) {
     if (Operation != Move && Operation != ReplaceOrInsert)
-      throw "The 'a', 'b' and 'i' modifiers can only be specified with "
-            "the 'm' or 'r' operations";
+      show_help("The 'a', 'b' and 'i' modifiers can only be specified with "
+            "the 'm' or 'r' operations");
+  }
   if (RecurseDirectories && Operation != ReplaceOrInsert)
-    throw "The 'R' modifiers is only applicabe to the 'r' operation";
+    show_help("The 'R' modifiers is only applicabe to the 'r' operation");
   if (OriginalDates && Operation != Extract)
-    throw "The 'o' modifier is only applicable to the 'x' operation";
+    show_help("The 'o' modifier is only applicable to the 'x' operation");
   if (TruncateNames && Operation!=QuickAppend && Operation!=ReplaceOrInsert)
-    throw "The 'f' modifier is only applicable to the 'q' and 'r' operations";
+    show_help("The 'f' modifier is only applicable to the 'q' and 'r' "
+              "operations");
   if (OnlyUpdate && Operation != ReplaceOrInsert)
-    throw "The 'u' modifier is only applicable to the 'r' operation";
+    show_help("The 'u' modifier is only applicable to the 'r' operation");
   if (Count > 1 && Members.size() > 1)
-    throw "Only one member name may be specified with the 'N' modifier";
+    show_help("Only one member name may be specified with the 'N' modifier");
 
   // Return the parsed operation to the caller
   return Operation;
@@ -304,16 +324,16 @@
   for (unsigned i = 0; i < Members.size(); i++) {
     sys::Path aPath;
     if (!aPath.set(Members[i]))
-      throw std::string("File member name invalid: ") + Members[i];
+      fail(std::string("File member name invalid: ") + Members[i]);
     if (checkExistence) {
       bool Exists;
       if (sys::fs::exists(aPath.str(), Exists) || !Exists)
-        throw std::string("File does not exist: ") + Members[i];
+        fail(std::string("File does not exist: ") + Members[i]);
       std::string Err;
       sys::PathWithStatus PwS(aPath);
       const sys::FileStatus *si = PwS.getFileStatus(false, &Err);
       if (!si)
-        throw Err;
+        fail(Err);
       if (si->isDir) {
         std::set<sys::Path> dirpaths;
         if (recurseDirectories(aPath, dirpaths, ErrMsg))
@@ -683,6 +703,7 @@
 
 // main - main program for llvm-ar .. see comments in the code
 int main(int argc, char **argv) {
+  program_name = argv[0];
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
@@ -698,76 +719,60 @@
 
   int exitCode = 0;
 
-  // Make sure we don't exit with "unhandled exception".
-  try {
-    // Do our own parsing of the command line because the CommandLine utility
-    // can't handle the grouped positional parameters without a dash.
-    ArchiveOperation Operation = parseCommandLine();
-
-    // Check the path name of the archive
-    sys::Path ArchivePath;
-    if (!ArchivePath.set(ArchiveName))
-      throw std::string("Archive name invalid: ") + ArchiveName;
-
-    // Create or open the archive object.
-    bool Exists;
-    if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) {
-      // Produce a warning if we should and we're creating the archive
-      if (!Create)
-        errs() << argv[0] << ": creating " << ArchivePath.str() << "\n";
-      TheArchive = Archive::CreateEmpty(ArchivePath, Context);
-      TheArchive->writeToDisk();
-    } else {
-      std::string Error;
-      TheArchive = Archive::OpenAndLoad(ArchivePath, Context, &Error);
-      if (TheArchive == 0) {
-        errs() << argv[0] << ": error loading '" << ArchivePath.str() << "': "
-               << Error << "!\n";
-        return 1;
-      }
+  // Do our own parsing of the command line because the CommandLine utility
+  // can't handle the grouped positional parameters without a dash.
+  ArchiveOperation Operation = parseCommandLine();
+
+  // Check the path name of the archive
+  sys::Path ArchivePath;
+  if (!ArchivePath.set(ArchiveName)) {
+    errs() << argv[0] << ": Archive name invalid: " << ArchiveName << "\n";
+    return 1;
+  }
+
+  // Create or open the archive object.
+  bool Exists;
+  if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) {
+    // Produce a warning if we should and we're creating the archive
+    if (!Create)
+      errs() << argv[0] << ": creating " << ArchivePath.str() << "\n";
+    TheArchive = Archive::CreateEmpty(ArchivePath, Context);
+    TheArchive->writeToDisk();
+  } else {
+    std::string Error;
+    TheArchive = Archive::OpenAndLoad(ArchivePath, Context, &Error);
+    if (TheArchive == 0) {
+      errs() << argv[0] << ": error loading '" << ArchivePath.str() << "': "
+             << Error << "!\n";
+      return 1;
     }
+  }
 
-    // Make sure we're not fooling ourselves.
-    assert(TheArchive && "Unable to instantiate the archive");
-
-    // Make sure we clean up the archive even on failure.
-    std::auto_ptr<Archive> AutoArchive(TheArchive);
+  // Make sure we're not fooling ourselves.
+  assert(TheArchive && "Unable to instantiate the archive");
 
-    // Perform the operation
-    std::string ErrMsg;
-    bool haveError = false;
-    switch (Operation) {
-      case Print:           haveError = doPrint(&ErrMsg); break;
-      case Delete:          haveError = doDelete(&ErrMsg); break;
-      case Move:            haveError = doMove(&ErrMsg); break;
-      case QuickAppend:     haveError = doQuickAppend(&ErrMsg); break;
-      case ReplaceOrInsert: haveError = doReplaceOrInsert(&ErrMsg); break;
-      case DisplayTable:    haveError = doDisplayTable(&ErrMsg); break;
-      case Extract:         haveError = doExtract(&ErrMsg); break;
-      case NoOperation:
-        errs() << argv[0] << ": No operation was selected.\n";
-        break;
-    }
-    if (haveError) {
-      errs() << argv[0] << ": " << ErrMsg << "\n";
-      return 1;
-    }
-  } catch (const char*msg) {
-    // These errors are usage errors, thrown only by the various checks in the
-    // code above.
-    errs() << argv[0] << ": " << msg << "\n\n";
-    cl::PrintHelpMessage();
-    exitCode = 1;
-  } catch (const std::string& msg) {
-    // These errors are thrown by LLVM libraries (e.g. lib System) and represent
-    // a more serious error so we bump the exitCode and don't print the usage.
-    errs() << argv[0] << ": " << msg << "\n";
-    exitCode = 2;
-  } catch (...) {
-    // This really shouldn't happen, but just in case ....
-    errs() << argv[0] << ": An unexpected unknown exception occurred.\n";
-    exitCode = 3;
+  // Perform the operation
+  std::string ErrMsg;
+  bool haveError = false;
+  switch (Operation) {
+    case Print:           haveError = doPrint(&ErrMsg); break;
+    case Delete:          haveError = doDelete(&ErrMsg); break;
+    case Move:            haveError = doMove(&ErrMsg); break;
+    case QuickAppend:     haveError = doQuickAppend(&ErrMsg); break;
+    case ReplaceOrInsert: haveError = doReplaceOrInsert(&ErrMsg); break;
+    case DisplayTable:    haveError = doDisplayTable(&ErrMsg); break;
+    case Extract:         haveError = doExtract(&ErrMsg); break;
+    case NoOperation:
+      errs() << argv[0] << ": No operation was selected.\n";
+      break;
   }
+  if (haveError) {
+    errs() << argv[0] << ": " << ErrMsg << "\n";
+    return 1;
+  }
+
+  delete TheArchive;
+  TheArchive = 0;
 
   // Return result code back to operating system.
   return exitCode;

Modified: llvm/branches/R600/tools/llvm-as/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-as/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-as/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/llvm-as/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS asmparser bitwriter)
-set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvm-as
   llvm-as.cpp

Modified: llvm/branches/R600/tools/llvm-bcanalyzer/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-bcanalyzer/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-bcanalyzer/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/llvm-bcanalyzer/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS bitreader)
-set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvm-bcanalyzer
   llvm-bcanalyzer.cpp

Modified: llvm/branches/R600/tools/llvm-dis/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-dis/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-dis/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/llvm-dis/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS bitreader analysis)
-set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvm-dis
   llvm-dis.cpp

Modified: llvm/branches/R600/tools/llvm-extract/llvm-extract.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-extract/llvm-extract.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-extract/llvm-extract.cpp (original)
+++ llvm/branches/R600/tools/llvm-extract/llvm-extract.cpp Tue Nov 13 09:21:47 2012
@@ -59,6 +59,19 @@
                                      "regular expression"),
                    cl::ZeroOrMore, cl::value_desc("rfunction"));
 
+// ExtractAlias - The alias to extract from the module.
+static cl::list<std::string>
+ExtractAliases("alias", cl::desc("Specify alias to extract"),
+               cl::ZeroOrMore, cl::value_desc("alias"));
+
+
+// ExtractRegExpAliases - The aliases, matched via regular expression, to
+// extract from the module.
+static cl::list<std::string>
+ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a "
+                                        "regular expression"),
+                     cl::ZeroOrMore, cl::value_desc("ralias"));
+
 // ExtractGlobals - The globals to extract from the module.
 static cl::list<std::string>
 ExtractGlobals("glob", cl::desc("Specify global to extract"),
@@ -97,6 +110,40 @@
   // Use SetVector to avoid duplicates.
   SetVector<GlobalValue *> GVs;
 
+  // Figure out which aliases we should extract.
+  for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) {
+    GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]);
+    if (!GA) {
+      errs() << argv[0] << ": program doesn't contain alias named '"
+             << ExtractAliases[i] << "'!\n";
+      return 1;
+    }
+    GVs.insert(GA);
+  }
+
+  // Extract aliases via regular expression matching.
+  for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) {
+    std::string Error;
+    Regex RegEx(ExtractRegExpAliases[i]);
+    if (!RegEx.isValid(Error)) {
+      errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' "
+        "invalid regex: " << Error;
+    }
+    bool match = false;
+    for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end();
+         GA != E; GA++) {
+      if (RegEx.match(GA->getName())) {
+        GVs.insert(&*GA);
+        match = true;
+      }
+    }
+    if (!match) {
+      errs() << argv[0] << ": program doesn't contain global named '"
+             << ExtractRegExpAliases[i] << "'!\n";
+      return 1;
+    }
+  }
+
   // Figure out which globals we should extract.
   for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) {
     GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]);

Modified: llvm/branches/R600/tools/llvm-mc/llvm-mc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-mc/llvm-mc.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-mc/llvm-mc.cpp (original)
+++ llvm/branches/R600/tools/llvm-mc/llvm-mc.cpp Tue Nov 13 09:21:47 2012
@@ -158,7 +158,8 @@
   AC_AsLex,
   AC_Assemble,
   AC_Disassemble,
-  AC_EDisassemble
+  AC_EDisassemble,
+  AC_MDisassemble
 };
 
 static cl::opt<ActionType>
@@ -172,6 +173,8 @@
                              "Disassemble strings of hex bytes"),
                   clEnumValN(AC_EDisassemble, "edis",
                              "Enhanced disassembly of strings of hex bytes"),
+                  clEnumValN(AC_MDisassemble, "mdis",
+                             "Marked up disassembly of strings of hex bytes"),
                   clEnumValEnd));
 
 static const Target *GetTarget(const char *ProgName) {
@@ -402,8 +405,9 @@
   OwningPtr<MCSubtargetInfo>
     STI(TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
 
+  MCInstPrinter *IP;
   if (FileType == OFT_AssemblyFile) {
-    MCInstPrinter *IP =
+    IP =
       TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *MCII, *MRI, *STI);
     MCCodeEmitter *CE = 0;
     MCAsmBackend *MAB = 0;
@@ -436,6 +440,9 @@
   case AC_Assemble:
     Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI);
     break;
+  case AC_MDisassemble:
+    IP->setUseMarkup(1);
+    // Fall through to do disassembly.
   case AC_Disassemble:
     Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str,
                                     *Buffer, SrcMgr, Out->os());

Added: llvm/branches/R600/tools/llvm-mcmarkup/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-mcmarkup/CMakeLists.txt?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/tools/llvm-mcmarkup/CMakeLists.txt (added)
+++ llvm/branches/R600/tools/llvm-mcmarkup/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_llvm_tool(llvm-mcmarkup
+  llvm-mcmarkup.cpp
+  )

Copied: llvm/branches/R600/tools/llvm-mcmarkup/LLVMBuild.txt (from r167837, llvm/branches/R600/tools/lli/LLVMBuild.txt)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-mcmarkup/LLVMBuild.txt?p2=llvm/branches/R600/tools/llvm-mcmarkup/LLVMBuild.txt&p1=llvm/branches/R600/tools/lli/LLVMBuild.txt&r1=167837&r2=167838&rev=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/LLVMBuild.txt (original)
+++ llvm/branches/R600/tools/llvm-mcmarkup/LLVMBuild.txt Tue Nov 13 09:21:47 2012
@@ -1,4 +1,4 @@
-;===- ./tools/lli/LLVMBuild.txt --------------------------------*- Conf -*--===;
+;===- ./tools/llvm-mcmarkup/LLVMBuild.txt ----------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,6 +17,6 @@
 
 [component_0]
 type = Tool
-name = lli
+name = llvm-mcmarkup
 parent = Tools
-required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG
+required_libraries = Support

Copied: llvm/branches/R600/tools/llvm-mcmarkup/Makefile (from r167837, llvm/branches/R600/tools/llvm-ar/Makefile)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-mcmarkup/Makefile?p2=llvm/branches/R600/tools/llvm-mcmarkup/Makefile&p1=llvm/branches/R600/tools/llvm-ar/Makefile&r1=167837&r2=167838&rev=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ar/Makefile (original)
+++ llvm/branches/R600/tools/llvm-mcmarkup/Makefile Tue Nov 13 09:21:47 2012
@@ -1,18 +1,17 @@
-##===- tools/llvm-ar/Makefile ------------------------------*- Makefile -*-===##
-# 
+##===- tools/llvm-mcmarkup/Makefile ------------------------*- Makefile -*-===##
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 
 LEVEL := ../..
-TOOLNAME := llvm-ar
-LINK_COMPONENTS := archive
-REQUIRES_EH := 1
+TOOLNAME := llvm-mcmarkup
+LINK_COMPONENTS := support
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS := 1
+TOOL_NO_EXPORTS = 1
 
 include $(LEVEL)/Makefile.common

Added: llvm/branches/R600/tools/llvm-mcmarkup/llvm-mcmarkup.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-mcmarkup/llvm-mcmarkup.cpp?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/tools/llvm-mcmarkup/llvm-mcmarkup.cpp (added)
+++ llvm/branches/R600/tools/llvm-mcmarkup/llvm-mcmarkup.cpp Tue Nov 13 09:21:47 2012
@@ -0,0 +1,225 @@
+//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Example simple parser implementation for the MC assembly markup language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+static cl::list<std::string>
+       InputFilenames(cl::Positional, cl::desc("<input files>"),
+                      cl::ZeroOrMore);
+static cl::opt<bool>
+DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
+
+static StringRef ToolName;
+
+/// Trivial lexer for the markup parser. Input is always handled a character
+/// at a time. The lexer just encapsulates EOF and lookahead handling.
+class MarkupLexer {
+  StringRef::const_iterator Start;
+  StringRef::const_iterator CurPtr;
+  StringRef::const_iterator End;
+public:
+  MarkupLexer(StringRef Source)
+    : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
+  // When processing non-markup, input is consumed a character at a time.
+  bool isEOF() { return CurPtr == End; }
+  int getNextChar() {
+    if (CurPtr == End) return EOF;
+    return *CurPtr++;
+  }
+  int peekNextChar() {
+    if (CurPtr == End) return EOF;
+    return *CurPtr;
+  }
+  StringRef::const_iterator getPosition() const { return CurPtr; }
+};
+
+/// A markup tag is a name and a (usually empty) list of modifiers.
+class MarkupTag {
+  StringRef Name;
+  StringRef Modifiers;
+  SMLoc StartLoc;
+public:
+  MarkupTag(StringRef n, StringRef m, SMLoc Loc)
+    : Name(n), Modifiers(m), StartLoc(Loc) {}
+  StringRef getName() const { return Name; }
+  StringRef getModifiers() const { return Modifiers; }
+  SMLoc getLoc() const { return StartLoc; }
+};
+
+/// A simple parser implementation for creating MarkupTags from input text.
+class MarkupParser {
+  MarkupLexer &Lex;
+  SourceMgr &SM;
+public:
+  MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
+  /// Create a MarkupTag from the current position in the MarkupLexer.
+  /// The parseTag() method should be called when the lexer has processed
+  /// the opening '<' character. Input will be consumed up to and including
+  /// the ':' which terminates the tag open.
+  MarkupTag parseTag();
+  /// Issue a diagnostic and terminate program execution.
+  void FatalError(SMLoc Loc, StringRef Msg);
+};
+
+void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
+  SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+  exit(1);
+}
+
+// Example handler for when a tag is recognized.
+static void processStartTag(MarkupTag &Tag) {
+  // If we're just printing the tags, do that, otherwise do some simple
+  // colorization.
+  if (DumpTags) {
+    outs() << Tag.getName();
+    if (Tag.getModifiers().size())
+      outs() << " " << Tag.getModifiers();
+    outs() << "\n";
+    return;
+  }
+
+  if (!outs().has_colors())
+    return;
+  // Color registers as red and immediates as cyan. Those don't have nested
+  // tags, so don't bother keeping a stack of colors to reset to.
+  if (Tag.getName() == "reg")
+    outs().changeColor(raw_ostream::RED);
+  else if (Tag.getName() == "imm")
+    outs().changeColor(raw_ostream::CYAN);
+}
+
+// Example handler for when the end of a tag is recognized.
+static void processEndTag(MarkupTag &Tag) {
+  // If we're printing the tags, there's nothing more to do here. Otherwise,
+  // set the color back the normal.
+  if (DumpTags)
+    return;
+  if (!outs().has_colors())
+    return;
+  // Just reset to basic white.
+  outs().changeColor(raw_ostream::WHITE, false);
+}
+
+MarkupTag MarkupParser::parseTag() {
+  // First off, extract the tag into it's own StringRef so we can look at it
+  // outside of the context of consuming input.
+  StringRef::const_iterator Start = Lex.getPosition();
+  SMLoc Loc = SMLoc::getFromPointer(Start - 1);
+  while(Lex.getNextChar() != ':') {
+    // EOF is an error.
+    if (Lex.isEOF())
+      FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
+  }
+  StringRef RawTag(Start, Lex.getPosition() - Start - 1);
+  std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
+  return MarkupTag(SplitTag.first, SplitTag.second, Loc);
+}
+
+static void parseMCMarkup(StringRef Filename) {
+  OwningPtr<MemoryBuffer> BufferPtr;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
+    errs() << ToolName << ": " << ec.message() << '\n';
+    return;
+  }
+  MemoryBuffer *Buffer = BufferPtr.take();
+
+  SourceMgr SrcMgr;
+
+  // Tell SrcMgr about this buffer, which is what the parser will pick up.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+  StringRef InputSource = Buffer->getBuffer();
+  MarkupLexer Lex(InputSource);
+  MarkupParser Parser(Lex, SrcMgr);
+
+  SmallVector<MarkupTag, 4> TagStack;
+
+  for (int CurChar = Lex.getNextChar();
+       CurChar != EOF;
+       CurChar = Lex.getNextChar()) {
+    switch (CurChar) {
+    case '<': {
+      // A "<<" is output as a literal '<' and does not start a markup tag.
+      if (Lex.peekNextChar() == '<') {
+        (void)Lex.getNextChar();
+        break;
+      }
+      // Parse the markup entry.
+      TagStack.push_back(Parser.parseTag());
+
+      // Do any special handling for the start of a tag.
+      processStartTag(TagStack.back());
+      continue;
+    }
+    case '>': {
+      SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
+      // A ">>" is output as a literal '>' and does not end a markup tag.
+      if (Lex.peekNextChar() == '>') {
+        (void)Lex.getNextChar();
+        break;
+      }
+      // Close out the innermost tag.
+      if (TagStack.empty())
+        Parser.FatalError(Loc, "'>' without matching '<'");
+
+      // Do any special handling for the end of a tag.
+      processEndTag(TagStack.back());
+
+      TagStack.pop_back();
+      continue;
+    }
+    default:
+      break;
+    }
+    // For anything else, just echo the character back out.
+    if (!DumpTags && CurChar != EOF)
+      outs() << (char)CurChar;
+  }
+
+  // If there are any unterminated markup tags, issue diagnostics for them.
+  while (!TagStack.empty()) {
+    MarkupTag &Tag = TagStack.back();
+    SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
+                        "unterminated markup tag");
+    TagStack.pop_back();
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
+
+  ToolName = argv[0];
+
+  // If no input files specified, read from stdin.
+  if (InputFilenames.size() == 0)
+    InputFilenames.push_back("-");
+
+  std::for_each(InputFilenames.begin(), InputFilenames.end(),
+                parseMCMarkup);
+  return 0;
+}

Modified: llvm/branches/R600/tools/llvm-ranlib/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-ranlib/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ranlib/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/llvm-ranlib/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS archive)
-set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvm-ranlib
   llvm-ranlib.cpp

Modified: llvm/branches/R600/tools/llvm-ranlib/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-ranlib/Makefile?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ranlib/Makefile (original)
+++ llvm/branches/R600/tools/llvm-ranlib/Makefile Tue Nov 13 09:21:47 2012
@@ -10,7 +10,6 @@
 LEVEL := ../..
 TOOLNAME := llvm-ranlib
 LINK_COMPONENTS := archive
-REQUIRES_EH := 1
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1

Modified: llvm/branches/R600/tools/llvm-ranlib/llvm-ranlib.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-ranlib/llvm-ranlib.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-ranlib/llvm-ranlib.cpp (original)
+++ llvm/branches/R600/tools/llvm-ranlib/llvm-ranlib.cpp Tue Nov 13 09:21:47 2012
@@ -61,41 +61,38 @@
 
   int exitCode = 0;
 
-  // Make sure we don't exit with "unhandled exception".
-  try {
+  // Check the path name of the archive
+  sys::Path ArchivePath;
+  if (!ArchivePath.set(ArchiveName)) {
+    errs() << argv[0] << ": " << "Archive name invalid: " << ArchiveName <<
+      "\n";
+    return 1;
+  }
+
+  // Make sure it exists, we don't create empty archives
+  bool Exists;
+  if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) {
+    errs() << argv[0] << ": " << "Archive file does not exist" <<
+      ArchivePath.str() << "\n";
+    return 1;
+  }
 
-    // Check the path name of the archive
-    sys::Path ArchivePath;
-    if (!ArchivePath.set(ArchiveName))
-      throw std::string("Archive name invalid: ") + ArchiveName;
-
-    // Make sure it exists, we don't create empty archives
-    bool Exists;
-    if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists)
-      throw std::string("Archive file does not exist");
-
-    std::string err_msg;
-    std::auto_ptr<Archive>
-      AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg));
-    Archive* TheArchive = AutoArchive.get();
-    if (!TheArchive)
-      throw err_msg;
-
-    if (TheArchive->writeToDisk(true, false, &err_msg ))
-      throw err_msg;
-
-    if (Verbose)
-      printSymbolTable(TheArchive);
-
-  } catch (const char* msg) {
-    errs() << argv[0] << ": " << msg << "\n\n";
-    exitCode = 1;
-  } catch (const std::string& msg) {
-    errs() << argv[0] << ": " << msg << "\n";
-    exitCode = 2;
-  } catch (...) {
-    errs() << argv[0] << ": An unexpected unknown exception occurred.\n";
-    exitCode = 3;
+  std::string err_msg;
+  std::auto_ptr<Archive>
+    AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg));
+  Archive* TheArchive = AutoArchive.get();
+  if (!TheArchive) {
+    errs() << argv[0] << ": " << err_msg << "\n";
+    return 1;
   }
+
+  if (TheArchive->writeToDisk(true, false, &err_msg )) {
+    errs() << argv[0] << ": " << err_msg << "\n";
+    return 1;
+  }
+
+  if (Verbose)
+    printSymbolTable(TheArchive);
+
   return exitCode;
 }

Modified: llvm/branches/R600/tools/lto/lto.exports
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lto/lto.exports?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/lto/lto.exports (original)
+++ llvm/branches/R600/tools/lto/lto.exports Tue Nov 13 09:21:47 2012
@@ -30,3 +30,4 @@
 LLVMCreateDisasm
 LLVMDisasmDispose
 LLVMDisasmInstruction
+LLVMSetDisasmOptions

Modified: llvm/branches/R600/tools/opt/opt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/opt/opt.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/tools/opt/opt.cpp (original)
+++ llvm/branches/R600/tools/opt/opt.cpp Tue Nov 13 09:21:47 2012
@@ -39,6 +39,7 @@
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/LinkAllVMCore.h"
@@ -564,6 +565,9 @@
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   LLVMContext &Context = getGlobalContext();
 
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+
   // Initialize passes
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);

Modified: llvm/branches/R600/unittests/ADT/APFloatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ADT/APFloatTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ADT/APFloatTest.cpp (original)
+++ llvm/branches/R600/unittests/ADT/APFloatTest.cpp Tue Nov 13 09:21:47 2012
@@ -737,4 +737,40 @@
   EXPECT_EQ(4294967295.0, test.convertToDouble());
   EXPECT_FALSE(losesInfo);
 }
+
+TEST(APFloatTest, PPCDoubleDouble) {
+  APFloat test(APFloat::PPCDoubleDouble, "1.0");
+  EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);
+  EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);
+
+  test.divide(APFloat(APFloat::PPCDoubleDouble, "3.0"), APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(0x3fd5555555555555ull, test.bitcastToAPInt().getRawData()[0]);
+  EXPECT_EQ(0x3c75555555555556ull, test.bitcastToAPInt().getRawData()[1]);
+
+  // LDBL_MAX
+  test = APFloat(APFloat::PPCDoubleDouble, "1.79769313486231580793728971405301e+308");
+  EXPECT_EQ(0x7fefffffffffffffull, test.bitcastToAPInt().getRawData()[0]);
+  EXPECT_EQ(0x7c8ffffffffffffeull, test.bitcastToAPInt().getRawData()[1]);
+
+  // LDBL_MIN
+  test = APFloat(APFloat::PPCDoubleDouble, "2.00416836000897277799610805135016e-292");
+  EXPECT_EQ(0x0360000000000000ull, test.bitcastToAPInt().getRawData()[0]);
+  EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);
+
+  test = APFloat(APFloat::PPCDoubleDouble, "1.0");
+  test.add(APFloat(APFloat::PPCDoubleDouble, "0x1p-105"), APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);
+  EXPECT_EQ(0x3960000000000000ull, test.bitcastToAPInt().getRawData()[1]);
+
+  test = APFloat(APFloat::PPCDoubleDouble, "1.0");
+  test.add(APFloat(APFloat::PPCDoubleDouble, "0x1p-106"), APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);
+#if 0 // XFAIL
+  // This is what we would expect with a true double-double implementation
+  EXPECT_EQ(0x3950000000000000ull, test.bitcastToAPInt().getRawData()[1]);
+#else
+  // This is what we get with our 106-bit mantissa approximation
+  EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);
+#endif
+}
 }

Modified: llvm/branches/R600/unittests/ADT/DenseMapTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ADT/DenseMapTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ADT/DenseMapTest.cpp (original)
+++ llvm/branches/R600/unittests/ADT/DenseMapTest.cpp Tue Nov 13 09:21:47 2012
@@ -330,4 +330,37 @@
   EXPECT_TRUE(map.find_as("d") == map.end());
 }
 
+struct ContiguousDenseMapInfo {
+  static inline unsigned getEmptyKey() { return ~0; }
+  static inline unsigned getTombstoneKey() { return ~0U - 1; }
+  static unsigned getHashValue(const unsigned& Val) { return Val; }
+  static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Test that filling a small dense map with exactly the number of elements in
+// the map grows to have enough space for an empty bucket.
+TEST(DenseMapCustomTest, SmallDenseMapGrowTest) {
+  SmallDenseMap<unsigned, unsigned, 32, ContiguousDenseMapInfo> map;
+  // Add some number of elements, then delete a few to leave us some tombstones.
+  // If we just filled the map with 32 elements we'd grow because of not enough
+  // tombstones which masks the issue here.
+  for (unsigned i = 0; i < 20; ++i)
+    map[i] = i + 1;
+  for (unsigned i = 0; i < 10; ++i)
+    map.erase(i);
+  for (unsigned i = 20; i < 32; ++i)
+    map[i] = i + 1;
+
+  // Size tests
+  EXPECT_EQ(22u, map.size());
+
+  // Try to find an element which doesn't exist.  There was a bug in
+  // SmallDenseMap which led to a map with num elements == small capacity not
+  // having an empty bucket any more.  Finding an element not in the map would
+  // therefore never terminate.
+  EXPECT_TRUE(map.find(32) == map.end());
+}
+
 }

Modified: llvm/branches/R600/unittests/ExecutionEngine/JIT/JITTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/JIT/JITTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/JIT/JITTest.cpp (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/JIT/JITTest.cpp Tue Nov 13 09:21:47 2012
@@ -224,8 +224,8 @@
   OwningPtr<ExecutionEngine> TheJIT;
 };
 
-// Tests on ARM disabled as we're running the old jit
-#if !defined(__arm__)
+// Tests on ARM and PowerPC disabled as we're running the old jit
+#if !defined(__arm__) && !defined(__powerpc__)
 
 // Regression test for a bug.  The JIT used to allocate globals inside the same
 // memory block used for the function, and when the function code was freed,
@@ -295,14 +295,14 @@
   EXPECT_EQ(3, *GPtr);
 }
 
-#endif // !defined(__arm__)
+#endif // !defined(__arm__) && !defined(__powerpc__)
 
 int PlusOne(int arg) {
   return arg + 1;
 }
 
-// ARM tests disabled pending fix for PR10783.
-#if !defined(__arm__)
+// ARM and PowerPC tests disabled pending fix for PR10783.
+#if !defined(__arm__) && !defined(__powerpc__)
 TEST_F(JITTest, FarCallToKnownFunction) {
   // x86-64 can only make direct calls to functions within 32 bits of
   // the current PC.  To call anything farther away, we have to load
@@ -480,7 +480,7 @@
   EXPECT_EQ(RJMM->startExceptionTableCalls.size(),
             NumTablesDeallocated);
 }
-#endif // !defined(__arm__)
+#endif // !defined(__arm__) && !defined(__powerpc__)
 
 // ARM, MIPS and PPC still emit stubs for calls since the target may be
 // too far away to call directly.  This #if can probably be removed when
@@ -526,8 +526,8 @@
 }
 #endif  // !ARM && !PPC
 
-// Tests on ARM disabled as we're running the old jit
-#if !defined(__arm__)
+// Tests on ARM and PowerPC disabled as we're running the old jit
+#if !defined(__arm__) && !defined(__powerpc__)
 
 TEST_F(JITTest, FunctionPointersOutliveTheirCreator) {
   TheJIT->DisableLazyCompilation(true);
@@ -563,12 +563,13 @@
 #endif
 }
 
-#endif //!defined(__arm__)
+#endif //!defined(__arm__) && !defined(__powerpc__)
 
-// ARM does not have an implementation
+// Tests on ARM and PowerPC disabled as we're running the old jit
+// In addition, ARM does not have an implementation
 // of replaceMachineCodeForFunction(), so recompileAndRelinkFunction
 // doesn't work.
-#if !defined(__arm__)
+#if !defined(__arm__) && !defined(__powerpc__)
 TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
   Function *F = Function::Create(TypeBuilder<int(void), false>::get(Context),
                                  GlobalValue::ExternalLinkage, "test", M);
@@ -599,7 +600,7 @@
   EXPECT_EQ(2, OrigFPtr())
     << "The old pointer's target should now jump to the new version";
 }
-#endif  // !defined(__arm__)
+#endif  // !defined(__arm__) && !defined(__powerpc__)
 
 }  // anonymous namespace
 // This variable is intentionally defined differently in the statically-compiled
@@ -609,8 +610,8 @@
 int32_t JITTest_AvailableExternallyGlobal LLVM_ATTRIBUTE_USED = 42;
 namespace {
 
-// Tests on ARM disabled as we're running the old jit
-#if !defined(__arm__)
+// Tests on ARM and PowerPC disabled as we're running the old jit
+#if !defined(__arm__) && !defined(__powerpc__)
 
 TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
   TheJIT->DisableLazyCompilation(true);
@@ -628,7 +629,7 @@
   EXPECT_EQ(42, loader()) << "func should return 42 from the external global,"
                           << " not 7 from the IR version.";
 }
-#endif //!defined(__arm__)
+#endif //!defined(__arm__) && !defined(__powerpc__)
 }  // anonymous namespace
 // This function is intentionally defined differently in the statically-compiled
 // program from the IR input to the JIT to assert that the JIT doesn't use its
@@ -639,8 +640,8 @@
 }
 namespace {
 
-// ARM tests disabled pending fix for PR10783.
-#if !defined(__arm__)
+// ARM and PowerPC tests disabled pending fix for PR10783.
+#if !defined(__arm__) && !defined(__powerpc__)
 TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) {
   TheJIT->DisableLazyCompilation(true);
   LoadAssembly("define available_externally i32 "
@@ -796,7 +797,7 @@
     (intptr_t)TheJIT->getPointerToFunction(recur1IR));
   EXPECT_EQ(3, recur1(4));
 }
-#endif // !defined(__arm__)
+#endif // !defined(__arm__) && !defined(__powerpc__)
 
 // This code is copied from JITEventListenerTest, but it only runs once for all
 // the tests in this directory.  Everything seems fine, but that's strange

Modified: llvm/branches/R600/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/JIT/MultiJITTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/JIT/MultiJITTest.cpp (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/JIT/MultiJITTest.cpp Tue Nov 13 09:21:47 2012
@@ -65,8 +65,8 @@
   FooF2 = M2->getFunction("foo2");
 }
 
-// ARM tests disabled pending fix for PR10783.
-#if !defined(__arm__)
+// ARM and PowerPC tests disabled pending fix for PR10783.
+#if !defined(__arm__) && !defined(__powerpc__)
 
 TEST(MultiJitTest, EagerMode) {
   LLVMContext Context1;
@@ -176,6 +176,6 @@
 #endif
   EXPECT_TRUE(sa == fa);
 }
-#endif  // !defined(__arm__)
+#endif  // !defined(__arm__) && !defined(__powerpc__)
 
 }  // anonymous namespace

Modified: llvm/branches/R600/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp Tue Nov 13 09:21:47 2012
@@ -47,6 +47,7 @@
   GlobalValue *Global = insertGlobalInt32(M.get(), "test_global", initialValue);
   createJIT(M.take());
   void *globalPtr =  TheJIT->getPointerToGlobal(Global);
+  static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != globalPtr)
     << "Unable to get pointer to global value from JIT";
 
@@ -60,6 +61,7 @@
   Function *F = insertAddFunction(M.get());
   createJIT(M.take());
   void *addPtr = TheJIT->getPointerToFunction(F);
+  static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != addPtr)
     << "Unable to get pointer to function from JIT";
 
@@ -76,6 +78,7 @@
   Function *Main = insertMainFunction(M.get(), 6);
   createJIT(M.take());
   void *vPtr = TheJIT->getPointerToFunction(Main);
+  static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != vPtr)
     << "Unable to get pointer to main() from JIT";
 
@@ -97,6 +100,7 @@
 
   createJIT(M.take());
   void *rgvPtr = TheJIT->getPointerToFunction(ReturnGlobal);
+  static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != rgvPtr);
 
   int32_t(*FuncPtr)(void) = (int32_t(*)(void))(intptr_t)rgvPtr;
@@ -165,6 +169,7 @@
 
   createJIT(M.take());
   void *vPtr = TheJIT->getPointerToFunction(Outer);
+  static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != vPtr)
     << "Unable to get pointer to outer function from JIT";
 

Modified: llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp Tue Nov 13 09:21:47 2012
@@ -14,6 +14,7 @@
 
 #include "llvm/Config/config.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MathExtras.h"
 
 #include "SectionMemoryManager.h"
 
@@ -34,9 +35,16 @@
                                                     unsigned SectionID) {
   if (!Alignment)
     Alignment = 16;
-  uint8_t *Addr = (uint8_t*)calloc((Size + Alignment - 1)/Alignment, Alignment);
-  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, Size));
-  return Addr;
+  // Ensure that enough memory is requested to allow aligning.
+  size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment;
+  uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment);
+
+  // Honour the alignment requirement.
+  uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment);
+
+  // Store the original address from calloc so we can free it later.
+  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment));
+  return AlignedAddr;
 }
 
 uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,

Modified: llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h Tue Nov 13 09:21:47 2012
@@ -52,6 +52,7 @@
   SmallVector<sys::MemoryBlock, 16> FreeCodeMem;
 
 public:
+
   ///
   /// Functions below are not used by MCJIT, but must be implemented because
   /// they are declared as pure virtuals in the base class.

Modified: llvm/branches/R600/unittests/VMCore/IRBuilderTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/VMCore/IRBuilderTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/VMCore/IRBuilderTest.cpp (original)
+++ llvm/branches/R600/unittests/VMCore/IRBuilderTest.cpp Tue Nov 13 09:21:47 2012
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/BasicBlock.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/IntrinsicInst.h"
@@ -96,4 +97,15 @@
   EXPECT_EQ(Weights, TI->getMetadata(LLVMContext::MD_prof));
 }
 
+TEST_F(IRBuilderTest, GetIntTy) {
+  IRBuilder<> Builder(BB);
+  IntegerType *Ty1 = Builder.getInt1Ty();
+  EXPECT_EQ(Ty1, IntegerType::get(getGlobalContext(), 1));
+
+  DataLayout* DL = new DataLayout(M.get());
+  IntegerType *IntPtrTy = Builder.getIntPtrTy(DL);
+  unsigned IntPtrBitSize =  DL->getPointerSizeInBits(0);
+  EXPECT_EQ(IntPtrTy, IntegerType::get(getGlobalContext(), IntPtrBitSize));
+}
+
 }

Modified: llvm/branches/R600/unittests/VMCore/InstructionsTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/VMCore/InstructionsTest.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/unittests/VMCore/InstructionsTest.cpp (original)
+++ llvm/branches/R600/unittests/VMCore/InstructionsTest.cpp Tue Nov 13 09:21:47 2012
@@ -243,5 +243,42 @@
   delete I;
 }
 
+
+TEST(InstructionsTest, isEliminableCastPair) {
+  LLVMContext &C(getGlobalContext());
+
+  Type* Int32Ty = Type::getInt32Ty(C);
+  Type* Int64Ty = Type::getInt64Ty(C);
+  Type* Int64PtrTy = Type::getInt64PtrTy(C);
+
+  // Source and destination pointers have same size -> bitcast.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt,
+                                           CastInst::IntToPtr,
+                                           Int64PtrTy, Int64Ty, Int64PtrTy,
+                                           Int32Ty, 0, Int32Ty),
+            CastInst::BitCast);
+
+  // Source and destination pointers have different sizes -> fail.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt,
+                                           CastInst::IntToPtr,
+                                           Int64PtrTy, Int64Ty, Int64PtrTy,
+                                           Int32Ty, 0, Int64Ty),
+            0U);
+
+  // Middle pointer big enough -> bitcast.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr,
+                                           CastInst::PtrToInt,
+                                           Int64Ty, Int64PtrTy, Int64Ty,
+                                           0, Int64Ty, 0),
+            CastInst::BitCast);
+
+  // Middle pointer too small -> fail.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr,
+                                           CastInst::PtrToInt,
+                                           Int64Ty, Int64PtrTy, Int64Ty,
+                                           0, Int32Ty, 0),
+            0U);
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm

Modified: llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -689,18 +689,18 @@
   // Split via the '='.
   std::pair<StringRef, StringRef> Ops = S.split('=');
   if (Ops.second == "")
-    throw TGError(Loc, "missing '=' in two-operand alias constraint");
+    PrintFatalError(Loc, "missing '=' in two-operand alias constraint");
   // Trim whitespace and the leading '$' on the operand names.
   size_t start = Ops.first.find_first_of('$');
   if (start == std::string::npos)
-    throw TGError(Loc, "expected '$' prefix on asm operand name");
+    PrintFatalError(Loc, "expected '$' prefix on asm operand name");
   Ops.first = Ops.first.slice(start + 1, std::string::npos);
   size_t end = Ops.first.find_last_of(" \t");
   Ops.first = Ops.first.slice(0, end);
   // Now the second operand.
   start = Ops.second.find_first_of('$');
   if (start == std::string::npos)
-    throw TGError(Loc, "expected '$' prefix on asm operand name");
+    PrintFatalError(Loc, "expected '$' prefix on asm operand name");
   Ops.second = Ops.second.slice(start + 1, std::string::npos);
   end = Ops.second.find_last_of(" \t");
   Ops.first = Ops.first.slice(0, end);
@@ -716,11 +716,11 @@
   int SrcAsmOperand = findAsmOperandNamed(Ops.first);
   int DstAsmOperand = findAsmOperandNamed(Ops.second);
   if (SrcAsmOperand == -1)
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "unknown source two-operand alias operand '" +
                   Ops.first.str() + "'.");
   if (DstAsmOperand == -1)
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "unknown destination two-operand alias operand '" +
                   Ops.second.str() + "'.");
 
@@ -852,15 +852,15 @@
   // The first token of the instruction is the mnemonic, which must be a
   // simple string, not a $foo variable or a singleton register.
   if (AsmOperands.empty())
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "Instruction '" + TheDef->getName() + "' has no tokens");
   Mnemonic = AsmOperands[0].Token;
   if (Mnemonic.empty())
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "Missing instruction mnemonic");
   // FIXME : Check and raise an error if it is a register.
   if (Mnemonic[0] == '$')
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "Invalid instruction mnemonic '" + Mnemonic.str() + "'!");
 
   // Remove the first operand, it is tracked in the mnemonic field.
@@ -870,12 +870,12 @@
 bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
   // Reject matchables with no .s string.
   if (AsmString.empty())
-    throw TGError(TheDef->getLoc(), "instruction with empty asm string");
+    PrintFatalError(TheDef->getLoc(), "instruction with empty asm string");
 
   // Reject any matchables with a newline in them, they should be marked
   // isCodeGenOnly if they are pseudo instructions.
   if (AsmString.find('\n') != std::string::npos)
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "multiline instruction is not valid for the asmparser, "
                   "mark it isCodeGenOnly");
 
@@ -883,7 +883,7 @@
   // has one line.
   if (!CommentDelimiter.empty() &&
       StringRef(AsmString).find(CommentDelimiter) != StringRef::npos)
-    throw TGError(TheDef->getLoc(),
+    PrintFatalError(TheDef->getLoc(),
                   "asmstring for instruction has comment character in it, "
                   "mark it isCodeGenOnly");
 
@@ -897,7 +897,7 @@
   for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
     StringRef Tok = AsmOperands[i].Token;
     if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
-      throw TGError(TheDef->getLoc(),
+      PrintFatalError(TheDef->getLoc(),
                     "matchable with operand modifier '" + Tok.str() +
                     "' not supported by asm matcher.  Mark isCodeGenOnly!");
 
@@ -905,7 +905,7 @@
     // We reject aliases and ignore instructions for now.
     if (Tok[0] == '$' && !OperandNames.insert(Tok).second) {
       if (!Hack)
-        throw TGError(TheDef->getLoc(),
+        PrintFatalError(TheDef->getLoc(),
                       "ERROR: matchable with tied operand '" + Tok.str() +
                       "' can never be matched!");
       // FIXME: Should reject these.  The ARM backend hits this with $lane in a
@@ -1004,8 +1004,8 @@
     // use it, else just fall back to the underlying register class.
     const RecordVal *R = Rec->getValue("ParserMatchClass");
     if (R == 0 || R->getValue() == 0)
-      throw "Record `" + Rec->getName() +
-        "' does not have a ParserMatchClass!\n";
+      PrintFatalError("Record `" + Rec->getName() +
+        "' does not have a ParserMatchClass!\n");
 
     if (DefInit *DI= dyn_cast<DefInit>(R->getValue())) {
       Record *MatchClass = DI->getDef();
@@ -1016,28 +1016,28 @@
     // No custom match class. Just use the register class.
     Record *ClassRec = Rec->getValueAsDef("RegClass");
     if (!ClassRec)
-      throw TGError(Rec->getLoc(), "RegisterOperand `" + Rec->getName() +
+      PrintFatalError(Rec->getLoc(), "RegisterOperand `" + Rec->getName() +
                     "' has no associated register class!\n");
     if (ClassInfo *CI = RegisterClassClasses[ClassRec])
       return CI;
-    throw TGError(Rec->getLoc(), "register class has no class info!");
+    PrintFatalError(Rec->getLoc(), "register class has no class info!");
   }
 
 
   if (Rec->isSubClassOf("RegisterClass")) {
     if (ClassInfo *CI = RegisterClassClasses[Rec])
       return CI;
-    throw TGError(Rec->getLoc(), "register class has no class info!");
+    PrintFatalError(Rec->getLoc(), "register class has no class info!");
   }
 
   if (!Rec->isSubClassOf("Operand"))
-    throw TGError(Rec->getLoc(), "Operand `" + Rec->getName() +
+    PrintFatalError(Rec->getLoc(), "Operand `" + Rec->getName() +
                   "' does not derive from class Operand!\n");
   Record *MatchClass = Rec->getValueAsDef("ParserMatchClass");
   if (ClassInfo *CI = AsmOperandClasses[MatchClass])
     return CI;
 
-  throw TGError(Rec->getLoc(), "operand has no match class!");
+  PrintFatalError(Rec->getLoc(), "operand has no match class!");
 }
 
 void AsmMatcherInfo::
@@ -1287,7 +1287,7 @@
       continue;
 
     if (Pred->getName().empty())
-      throw TGError(Pred->getLoc(), "Predicate has no name!");
+      PrintFatalError(Pred->getLoc(), "Predicate has no name!");
 
     unsigned FeatureNo = SubtargetFeatures.size();
     SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo);
@@ -1468,7 +1468,7 @@
     ClassInfo *FromClass = getTokenClass(Rec->getValueAsString("FromToken"));
     ClassInfo *ToClass = getTokenClass(Rec->getValueAsString("ToToken"));
     if (FromClass == ToClass)
-      throw TGError(Rec->getLoc(),
+      PrintFatalError(Rec->getLoc(),
                     "error: Destination value identical to source value.");
     FromClass->SuperClasses.push_back(ToClass);
   }
@@ -1490,7 +1490,7 @@
   // Map this token to an operand.
   unsigned Idx;
   if (!Operands.hasOperandNamed(OperandName, Idx))
-    throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" +
+    PrintFatalError(II->TheDef->getLoc(), "error: unable to find operand: '" +
                   OperandName.str() + "'");
 
   // If the instruction operand has multiple suboperands, but the parser
@@ -1561,7 +1561,7 @@
       return;
     }
 
-  throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" +
+  PrintFatalError(II->TheDef->getLoc(), "error: unable to find operand: '" +
                 OperandName.str() + "'");
 }
 
@@ -1583,7 +1583,7 @@
     // Find out what operand from the asmparser this MCInst operand comes from.
     int SrcOperand = findAsmOperandNamed(OpInfo.Name);
     if (OpInfo.Name.empty() || SrcOperand == -1)
-      throw TGError(TheDef->getLoc(), "Instruction '" +
+      PrintFatalError(TheDef->getLoc(), "Instruction '" +
                     TheDef->getName() + "' has operand '" + OpInfo.Name +
                     "' that doesn't appear in asm string!");
 
@@ -1635,7 +1635,7 @@
         StringRef Name = CGA.ResultOperands[AliasOpNo].getName();
         int SrcOperand = findAsmOperand(Name, SubIdx);
         if (SrcOperand == -1)
-          throw TGError(TheDef->getLoc(), "Instruction '" +
+          PrintFatalError(TheDef->getLoc(), "Instruction '" +
                         TheDef->getName() + "' has operand '" + OpName +
                         "' that doesn't appear in asm string!");
         unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1);
@@ -2270,7 +2270,7 @@
     SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]);
 
     if (F == 0)
-      throw TGError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() +
+      PrintFatalError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() +
                     "' is not marked as an AssemblerPredicate!");
 
     if (NumFeatures)
@@ -2333,14 +2333,14 @@
           // We can't have two aliases from the same mnemonic with no predicate.
           PrintError(ToVec[AliasWithNoPredicate]->getLoc(),
                      "two MnemonicAliases with the same 'from' mnemonic!");
-          throw TGError(R->getLoc(), "this is the other MnemonicAlias.");
+          PrintFatalError(R->getLoc(), "this is the other MnemonicAlias.");
         }
 
         AliasWithNoPredicate = i;
         continue;
       }
       if (R->getValueAsString("ToMnemonic") == I->first)
-        throw TGError(R->getLoc(), "MnemonicAlias to the same string");
+        PrintFatalError(R->getLoc(), "MnemonicAlias to the same string");
 
       if (!MatchCode.empty())
         MatchCode += "else ";

Modified: llvm/branches/R600/utils/TableGen/AsmWriterEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/AsmWriterEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/AsmWriterEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/AsmWriterEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -566,9 +566,9 @@
         std::vector<std::string> AltNames =
           Reg.TheDef->getValueAsListOfStrings("AltNames");
         if (AltNames.size() <= Idx)
-          throw TGError(Reg.TheDef->getLoc(),
-                        (Twine("Register definition missing alt name for '") +
-                        AltName + "'.").str());
+          PrintFatalError(Reg.TheDef->getLoc(),
+            (Twine("Register definition missing alt name for '") +
+             AltName + "'.").str());
         AsmName = AltNames[Idx];
       }
     }

Modified: llvm/branches/R600/utils/TableGen/AsmWriterInst.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/AsmWriterInst.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/AsmWriterInst.cpp (original)
+++ llvm/branches/R600/utils/TableGen/AsmWriterInst.cpp Tue Nov 13 09:21:47 2012
@@ -14,6 +14,7 @@
 #include "AsmWriterInst.h"
 #include "CodeGenTarget.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 
 using namespace llvm;
@@ -123,8 +124,8 @@
                    != std::string::npos) {
           AddLiteralString(std::string(1, AsmString[DollarPos+1]));
         } else {
-          throw "Non-supported escaped character found in instruction '" +
-          CGI.TheDef->getName() + "'!";
+          PrintFatalError("Non-supported escaped character found in instruction '" +
+            CGI.TheDef->getName() + "'!");
         }
         LastEmitted = DollarPos+2;
         continue;
@@ -162,15 +163,15 @@
       // brace.
       if (hasCurlyBraces) {
         if (VarEnd >= AsmString.size())
-          throw "Reached end of string before terminating curly brace in '"
-          + CGI.TheDef->getName() + "'";
+          PrintFatalError("Reached end of string before terminating curly brace in '"
+            + CGI.TheDef->getName() + "'");
         
         // Look for a modifier string.
         if (AsmString[VarEnd] == ':') {
           ++VarEnd;
           if (VarEnd >= AsmString.size())
-            throw "Reached end of string before terminating curly brace in '"
-            + CGI.TheDef->getName() + "'";
+            PrintFatalError("Reached end of string before terminating curly brace in '"
+              + CGI.TheDef->getName() + "'");
           
           unsigned ModifierStart = VarEnd;
           while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd]))
@@ -178,17 +179,17 @@
           Modifier = std::string(AsmString.begin()+ModifierStart,
                                  AsmString.begin()+VarEnd);
           if (Modifier.empty())
-            throw "Bad operand modifier name in '"+ CGI.TheDef->getName() + "'";
+            PrintFatalError("Bad operand modifier name in '"+ CGI.TheDef->getName() + "'");
         }
         
         if (AsmString[VarEnd] != '}')
-          throw "Variable name beginning with '{' did not end with '}' in '"
-          + CGI.TheDef->getName() + "'";
+          PrintFatalError("Variable name beginning with '{' did not end with '}' in '"
+            + CGI.TheDef->getName() + "'");
         ++VarEnd;
       }
       if (VarName.empty() && Modifier.empty())
-        throw "Stray '$' in '" + CGI.TheDef->getName() +
-        "' asm string, maybe you want $$?";
+        PrintFatalError("Stray '$' in '" + CGI.TheDef->getName() +
+          "' asm string, maybe you want $$?");
       
       if (VarName.empty()) {
         // Just a modifier, pass this into PrintSpecial.

Modified: llvm/branches/R600/utils/TableGen/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CMakeLists.txt?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CMakeLists.txt (original)
+++ llvm/branches/R600/utils/TableGen/CMakeLists.txt Tue Nov 13 09:21:47 2012
@@ -1,4 +1,3 @@
-set(LLVM_REQUIRES_EH 1)
 set(LLVM_LINK_COMPONENTS Support)
 
 add_tablegen(llvm-tblgen LLVM
@@ -9,6 +8,7 @@
   CodeEmitterGen.cpp
   CodeGenDAGPatterns.cpp
   CodeGenInstruction.cpp
+  CodeGenMapTable.cpp
   CodeGenRegisters.cpp
   CodeGenSchedule.cpp
   CodeGenTarget.cpp

Modified: llvm/branches/R600/utils/TableGen/CallingConvEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CallingConvEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CallingConvEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CallingConvEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTarget.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <cassert>
@@ -93,7 +94,7 @@
       O << Action->getValueAsString("Predicate");
     } else {
       Action->dump();
-      throw "Unknown CCPredicateAction!";
+      PrintFatalError("Unknown CCPredicateAction!");
     }
     
     O << ") {\n";
@@ -131,7 +132,7 @@
       ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
       if (ShadowRegList->getSize() >0 &&
           ShadowRegList->getSize() != RegList->getSize())
-        throw "Invalid length of list of shadowed registers";
+        PrintFatalError("Invalid length of list of shadowed registers");
 
       if (RegList->getSize() == 1) {
         O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
@@ -221,7 +222,7 @@
       O << IndentStr << IndentStr << "return false;\n";
     } else {
       Action->dump();
-      throw "Unknown CCAction!";
+      PrintFatalError("Unknown CCAction!");
     }
   }
 }

Modified: llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp Tue Nov 13 09:21:47 2012
@@ -79,14 +79,19 @@
   const std::vector<MVT::SimpleValueType> &LegalTypes =
     TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
 
+  if (TP.hasError())
+    return false;
+
   for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
     if (Pred == 0 || Pred(LegalTypes[i]))
       TypeVec.push_back(LegalTypes[i]);
 
   // If we have nothing that matches the predicate, bail out.
-  if (TypeVec.empty())
+  if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, no " +
              std::string(PredicateName) + " types found");
+    return false;
+  }
   // No need to sort with one element.
   if (TypeVec.size() == 1) return true;
 
@@ -146,9 +151,9 @@
 
 /// MergeInTypeInfo - This merges in type information from the specified
 /// argument.  If 'this' changes, it returns true.  If the two types are
-/// contradictory (e.g. merge f32 into i32) then this throws an exception.
+/// contradictory (e.g. merge f32 into i32) then this flags an error.
 bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
-  if (InVT.isCompletelyUnknown() || *this == InVT)
+  if (InVT.isCompletelyUnknown() || *this == InVT || TP.hasError())
     return false;
 
   if (isCompletelyUnknown()) {
@@ -224,11 +229,13 @@
   // FIXME: Really want an SMLoc here!
   TP.error("Type inference contradiction found, merging '" +
            InVT.getName() + "' into '" + InputSet.getName() + "'");
-  return true; // unreachable
+  return false;
 }
 
 /// EnforceInteger - Remove all non-integer types from this set.
 bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
+  if (TP.hasError())
+    return false;
   // If we know nothing, then get the full set.
   if (TypeVec.empty())
     return FillWithPossibleTypes(TP, isInteger, "integer");
@@ -242,14 +249,18 @@
     if (!isInteger(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
 
-  if (TypeVec.empty())
+  if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be integer");
+    return false;
+  }
   return true;
 }
 
 /// EnforceFloatingPoint - Remove all integer types from this set.
 bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
+  if (TP.hasError())
+    return false;
   // If we know nothing, then get the full set.
   if (TypeVec.empty())
     return FillWithPossibleTypes(TP, isFloatingPoint, "floating point");
@@ -264,14 +275,19 @@
     if (!isFloatingPoint(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
 
-  if (TypeVec.empty())
+  if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be floating point");
+    return false;
+  }
   return true;
 }
 
 /// EnforceScalar - Remove all vector types from this.
 bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
+  if (TP.hasError())
+    return false;
+
   // If we know nothing, then get the full set.
   if (TypeVec.empty())
     return FillWithPossibleTypes(TP, isScalar, "scalar");
@@ -286,14 +302,19 @@
     if (!isScalar(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
 
-  if (TypeVec.empty())
+  if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be scalar");
+    return false;
+  }
   return true;
 }
 
 /// EnforceVector - Remove all vector types from this.
 bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
+  if (TP.hasError())
+    return false;
+
   // If we know nothing, then get the full set.
   if (TypeVec.empty())
     return FillWithPossibleTypes(TP, isVector, "vector");
@@ -308,9 +329,11 @@
       MadeChange = true;
     }
 
-  if (TypeVec.empty())
+  if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be a vector");
+    return false;
+  }
   return MadeChange;
 }
 
@@ -319,6 +342,9 @@
 /// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
 /// this an other based on this information.
 bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
+  if (TP.hasError())
+    return false;
+
   // Both operands must be integer or FP, but we don't care which.
   bool MadeChange = false;
 
@@ -365,19 +391,22 @@
     if (hasVectorTypes() && Other.hasVectorTypes()) {
       if (Type.getSizeInBits() >= OtherType.getSizeInBits())
         if (Type.getVectorElementType().getSizeInBits()
-            >= OtherType.getVectorElementType().getSizeInBits())
+            >= OtherType.getVectorElementType().getSizeInBits()) {
           TP.error("Type inference contradiction found, '" +
                    getName() + "' element type not smaller than '" +
                    Other.getName() +"'!");
+          return false;
+        }
     }
     else
       // For scalar types, the bitsize of this type must be larger
       // than that of the other.
-      if (Type.getSizeInBits() >= OtherType.getSizeInBits())
+      if (Type.getSizeInBits() >= OtherType.getSizeInBits()) {
         TP.error("Type inference contradiction found, '" +
                  getName() + "' is not smaller than '" +
                  Other.getName() +"'!");
-
+        return false;
+      }
   }
   
 
@@ -437,9 +466,11 @@
   // If this is the only type in the large set, the constraint can never be
   // satisfied.
   if ((Other.hasIntegerTypes() && OtherIntSize == 0)
-      || (Other.hasFloatingPointTypes() && OtherFPSize == 0))
+      || (Other.hasFloatingPointTypes() && OtherFPSize == 0)) {
     TP.error("Type inference contradiction found, '" +
              Other.getName() + "' has nothing larger than '" + getName() +"'!");
+    return false;
+  }
 
   // Okay, find the largest type in the Other set and remove it from the
   // current set.
@@ -493,9 +524,11 @@
   // If this is the only type in the small set, the constraint can never be
   // satisfied.
   if ((hasIntegerTypes() && IntSize == 0)
-      || (hasFloatingPointTypes() && FPSize == 0))
+      || (hasFloatingPointTypes() && FPSize == 0)) {
     TP.error("Type inference contradiction found, '" +
              getName() + "' has nothing smaller than '" + Other.getName()+"'!");
+    return false;
+  }
 
   return MadeChange;
 }
@@ -504,6 +537,9 @@
 /// whose element is specified by VTOperand.
 bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
                                            TreePattern &TP) {
+  if (TP.hasError())
+    return false;
+
   // "This" must be a vector and "VTOperand" must be a scalar.
   bool MadeChange = false;
   MadeChange |= EnforceVector(TP);
@@ -535,9 +571,11 @@
     }
   }
 
-  if (TypeVec.empty())  // FIXME: Really want an SMLoc here!
+  if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
     TP.error("Type inference contradiction found, forcing '" +
              InputSet.getName() + "' to have a vector element");
+    return false;
+  }
   return MadeChange;
 }
 
@@ -769,7 +807,7 @@
     ConstraintType = SDTCisVT;
     x.SDTCisVT_Info.VT = getValueType(R->getValueAsDef("VT"));
     if (x.SDTCisVT_Info.VT == MVT::isVoid)
-      throw TGError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT");
+      PrintFatalError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT");
 
   } else if (R->isSubClassOf("SDTCisPtrTy")) {
     ConstraintType = SDTCisPtrTy;
@@ -829,11 +867,13 @@
 
 /// ApplyTypeConstraint - Given a node in a pattern, apply this type
 /// constraint to the nodes operands.  This returns true if it makes a
-/// change, false otherwise.  If a type contradiction is found, throw an
-/// exception.
+/// change, false otherwise.  If a type contradiction is found, flag an error.
 bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
                                            const SDNodeInfo &NodeInfo,
                                            TreePattern &TP) const {
+  if (TP.hasError())
+    return false;
+
   unsigned ResNo = 0; // The result number being referenced.
   TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
 
@@ -866,8 +906,10 @@
     if (!NodeToApply->isLeaf() ||
         !isa<DefInit>(NodeToApply->getLeafValue()) ||
         !static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef()
-               ->isSubClassOf("ValueType"))
+               ->isSubClassOf("ValueType")) {
       TP.error(N->getOperator()->getName() + " expects a VT operand!");
+      return false;
+    }
     MVT::SimpleValueType VT =
      getValueType(static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef());
 
@@ -1176,7 +1218,11 @@
 /// fragments, inline them into place, giving us a pattern without any
 /// PatFrag references.
 TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
-  if (isLeaf()) return this;  // nothing to do.
+  if (TP.hasError())
+    return 0;
+
+  if (isLeaf())
+     return this;  // nothing to do.
   Record *Op = getOperator();
 
   if (!Op->isSubClassOf("PatFrag")) {
@@ -1199,9 +1245,11 @@
   TreePattern *Frag = TP.getDAGPatterns().getPatternFragment(Op);
 
   // Verify that we are passing the right number of operands.
-  if (Frag->getNumArgs() != Children.size())
+  if (Frag->getNumArgs() != Children.size()) {
     TP.error("'" + Op->getName() + "' fragment requires " +
              utostr(Frag->getNumArgs()) + " operands!");
+    return 0;
+  }
 
   TreePatternNode *FragTree = Frag->getOnlyTree()->clone();
 
@@ -1375,9 +1423,11 @@
 
 /// ApplyTypeConstraints - Apply all of the type constraints relevant to
 /// this node and its children in the tree.  This returns true if it makes a
-/// change, false otherwise.  If a type contradiction is found, throw an
-/// exception.
+/// change, false otherwise.  If a type contradiction is found, flag an error.
 bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
+  if (TP.hasError())
+    return false;
+
   CodeGenDAGPatterns &CDP = TP.getDAGPatterns();
   if (isLeaf()) {
     if (DefInit *DI = dyn_cast<DefInit>(getLeafValue())) {
@@ -1414,7 +1464,7 @@
 
       TP.error("Integer value '" + itostr(II->getValue()) +
                "' is out of range for type '" + getEnumName(getType(0)) + "'!");
-      return MadeChange;
+      return false;
     }
     return false;
   }
@@ -1477,10 +1527,12 @@
     for (unsigned i = 0, e = NumRetVTs; i != e; ++i)
       MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP);
 
-    if (getNumChildren() != NumParamVTs + 1)
+    if (getNumChildren() != NumParamVTs + 1) {
       TP.error("Intrinsic '" + Int->Name + "' expects " +
                utostr(NumParamVTs) + " operands, not " +
                utostr(getNumChildren() - 1) + " operands!");
+      return false;
+    }
 
     // Apply type info to the intrinsic ID.
     MadeChange |= getChild(0)->UpdateNodeType(0, MVT::iPTR, TP);
@@ -1500,9 +1552,11 @@
 
     // Check that the number of operands is sane.  Negative operands -> varargs.
     if (NI.getNumOperands() >= 0 &&
-        getNumChildren() != (unsigned)NI.getNumOperands())
+        getNumChildren() != (unsigned)NI.getNumOperands()) {
       TP.error(getOperator()->getName() + " node requires exactly " +
                itostr(NI.getNumOperands()) + " operands!");
+      return false;
+    }
 
     bool MadeChange = NI.ApplyTypeConstraints(this, TP);
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
@@ -1576,9 +1630,11 @@
         continue;
 
       // Verify that we didn't run out of provided operands.
-      if (ChildNo >= getNumChildren())
+      if (ChildNo >= getNumChildren()) {
         TP.error("Instruction '" + getOperator()->getName() +
                  "' expects more operands than were provided.");
+        return false;
+      }
 
       MVT::SimpleValueType VT;
       TreePatternNode *Child = getChild(ChildNo++);
@@ -1606,9 +1662,11 @@
       MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
     }
 
-    if (ChildNo != getNumChildren())
+    if (ChildNo != getNumChildren()) {
       TP.error("Instruction '" + getOperator()->getName() +
                "' was provided too many operands!");
+      return false;
+    }
 
     return MadeChange;
   }
@@ -1616,9 +1674,11 @@
   assert(getOperator()->isSubClassOf("SDNodeXForm") && "Unknown node type!");
 
   // Node transforms always take one operand.
-  if (getNumChildren() != 1)
+  if (getNumChildren() != 1) {
     TP.error("Node transform '" + getOperator()->getName() +
              "' requires one operand!");
+    return false;
+  }
 
   bool MadeChange = getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
 
@@ -1692,27 +1752,30 @@
 //
 
 TreePattern::TreePattern(Record *TheRec, ListInit *RawPat, bool isInput,
-                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){
-  isInputPattern = isInput;
+                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
+                         isInputPattern(isInput), HasError(false) {
   for (unsigned i = 0, e = RawPat->getSize(); i != e; ++i)
     Trees.push_back(ParseTreePattern(RawPat->getElement(i), ""));
 }
 
 TreePattern::TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
-                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){
-  isInputPattern = isInput;
+                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
+                         isInputPattern(isInput), HasError(false) {
   Trees.push_back(ParseTreePattern(Pat, ""));
 }
 
 TreePattern::TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
-                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){
-  isInputPattern = isInput;
+                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
+                         isInputPattern(isInput), HasError(false) {
   Trees.push_back(Pat);
 }
 
-void TreePattern::error(const std::string &Msg) const {
+void TreePattern::error(const std::string &Msg) {
+  if (HasError)
+    return;
   dump();
-  throw TGError(TheRecord->getLoc(), "In " + TheRecord->getName() + ": " + Msg);
+  PrintError(TheRecord->getLoc(), "In " + TheRecord->getName() + ": " + Msg);
+  HasError = true;
 }
 
 void TreePattern::ComputeNamedNodes() {
@@ -1901,7 +1964,7 @@
 
 /// InferAllTypes - Infer/propagate as many types throughout the expression
 /// patterns as possible.  Return true if all types are inferred, false
-/// otherwise.  Throw an exception if a type contradiction is found.
+/// otherwise.  Flags an error if a type contradiction is found.
 bool TreePattern::
 InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
   if (NamedNodes.empty())
@@ -2152,14 +2215,8 @@
 
     // Infer as many types as possible.  Don't worry about it if we don't infer
     // all of them, some may depend on the inputs of the pattern.
-    try {
-      ThePat->InferAllTypes();
-    } catch (...) {
-      // If this pattern fragment is not supported by this target (no types can
-      // satisfy its constraints), just ignore it.  If the bogus pattern is
-      // actually used by instructions, the type consistency error will be
-      // reported there.
-    }
+    ThePat->InferAllTypes();
+    ThePat->resetError();
 
     // If debugging, print out the pattern fragment result.
     DEBUG(ThePat->dump());
@@ -2199,8 +2256,8 @@
         /* Resolve all types */;
 
       if (TPN->ContainsUnresolvedType()) {
-        throw "Value #" + utostr(i) + " of OperandWithDefaultOps '" +
-          DefaultOps[i]->getName() +"' doesn't have a concrete type!";
+        PrintFatalError("Value #" + utostr(i) + " of OperandWithDefaultOps '" +
+          DefaultOps[i]->getName() +"' doesn't have a concrete type!");
       }
       DefaultOpInfo.DefaultOps.push_back(TPN);
     }
@@ -2746,7 +2803,7 @@
         Instructions.begin(),
        E = Instructions.end(); II != E; ++II) {
     DAGInstruction &TheInst = II->second;
-    const TreePattern *I = TheInst.getPattern();
+    TreePattern *I = TheInst.getPattern();
     if (I == 0) continue;  // No pattern.
 
     // FIXME: Assume only the first tree is the pattern. The others are clobber
@@ -2777,7 +2834,7 @@
 
 static void FindNames(const TreePatternNode *P,
                       std::map<std::string, NameRecord> &Names,
-                      const TreePattern *PatternTop) {
+                      TreePattern *PatternTop) {
   if (!P->getName().empty()) {
     NameRecord &Rec = Names[P->getName()];
     // If this is the first instance of the name, remember the node.
@@ -2794,7 +2851,7 @@
   }
 }
 
-void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
+void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern,
                                            const PatternToMatch &PTM) {
   // Do some sanity checking on the pattern we're about to match.
   std::string Reason;
@@ -2895,7 +2952,7 @@
   }
 
   if (Errors)
-    throw "pattern conflicts";
+    PrintFatalError("pattern conflicts");
 
   // Revisit instructions with undefined flags and no pattern.
   if (Target.guessInstructionProperties()) {
@@ -2992,7 +3049,7 @@
     }
   }
   if (Errors)
-    throw "Errors in DAG patterns";
+    PrintFatalError("Errors in DAG patterns");
 }
 
 /// Given a pattern result with an unresolved type, see if we can find one

Modified: llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h Tue Nov 13 09:21:47 2012
@@ -105,7 +105,7 @@
 
     /// MergeInTypeInfo - This merges in type information from the specified
     /// argument.  If 'this' changes, it returns true.  If the two types are
-    /// contradictory (e.g. merge f32 into i32) then this throws an exception.
+    /// contradictory (e.g. merge f32 into i32) then this flags an error.
     bool MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP);
 
     bool MergeInTypeInfo(MVT::SimpleValueType InVT, TreePattern &TP) {
@@ -187,8 +187,8 @@
 
   /// ApplyTypeConstraint - Given a node in a pattern, apply this type
   /// constraint to the nodes operands.  This returns true if it makes a
-  /// change, false otherwise.  If a type contradiction is found, throw an
-  /// exception.
+  /// change, false otherwise.  If a type contradiction is found, an error
+  /// is flagged.
   bool ApplyTypeConstraint(TreePatternNode *N, const SDNodeInfo &NodeInfo,
                            TreePattern &TP) const;
 };
@@ -232,7 +232,7 @@
   /// ApplyTypeConstraints - Given a node in a pattern, apply the type
   /// constraints for this node to the operands of the node.  This returns
   /// true if it makes a change, false otherwise.  If a type contradiction is
-  /// found, throw an exception.
+  /// found, an error is flagged.
   bool ApplyTypeConstraints(TreePatternNode *N, TreePattern &TP) const {
     bool MadeChange = false;
     for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i)
@@ -446,13 +446,12 @@
 
   /// ApplyTypeConstraints - Apply all of the type constraints relevant to
   /// this node and its children in the tree.  This returns true if it makes a
-  /// change, false otherwise.  If a type contradiction is found, throw an
-  /// exception.
+  /// change, false otherwise.  If a type contradiction is found, flag an error.
   bool ApplyTypeConstraints(TreePattern &TP, bool NotRegisters);
 
   /// UpdateNodeType - Set the node type of N to VT if VT contains
-  /// information.  If N already contains a conflicting type, then throw an
-  /// exception.  This returns true if any information was updated.
+  /// information.  If N already contains a conflicting type, then flag an
+  /// error.  This returns true if any information was updated.
   ///
   bool UpdateNodeType(unsigned ResNo, const EEVT::TypeSet &InTy,
                       TreePattern &TP) {
@@ -514,6 +513,10 @@
   /// isInputPattern - True if this is an input pattern, something to match.
   /// False if this is an output pattern, something to emit.
   bool isInputPattern;
+
+  /// hasError - True if the currently processed nodes have unresolvable types
+  /// or other non-fatal errors
+  bool HasError;
 public:
 
   /// TreePattern constructor - Parse the specified DagInits into the
@@ -565,13 +568,19 @@
 
   /// InferAllTypes - Infer/propagate as many types throughout the expression
   /// patterns as possible.  Return true if all types are inferred, false
-  /// otherwise.  Throw an exception if a type contradiction is found.
+  /// otherwise.  Bail out if a type contradiction is found.
   bool InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> >
                           *NamedTypes=0);
 
-  /// error - Throw an exception, prefixing it with information about this
-  /// pattern.
-  void error(const std::string &Msg) const;
+  /// error - If this is the first error in the current resolution step,
+  /// print it and set the error flag.  Otherwise, continue silently.
+  void error(const std::string &Msg);
+  bool hasError() const {
+    return HasError;
+  }
+  void resetError() {
+    HasError = false;
+  }
 
   void print(raw_ostream &OS) const;
   void dump() const;
@@ -602,7 +611,7 @@
     : Pattern(TP), Results(results), Operands(operands),
       ImpResults(impresults), ResultPattern(0) {}
 
-  const TreePattern *getPattern() const { return Pattern; }
+  TreePattern *getPattern() const { return Pattern; }
   unsigned getNumResults() const { return Results.size(); }
   unsigned getNumOperands() const { return Operands.size(); }
   unsigned getNumImpResults() const { return ImpResults.size(); }
@@ -794,7 +803,7 @@
   void GenerateVariants();
   void VerifyInstructionFlags();
 
-  void AddPatternToMatch(const TreePattern *Pattern, const PatternToMatch &PTM);
+  void AddPatternToMatch(TreePattern *Pattern, const PatternToMatch &PTM);
   void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
                                    std::map<std::string,
                                    TreePatternNode*> &InstInputs,

Modified: llvm/branches/R600/utils/TableGen/CodeGenInstruction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenInstruction.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenInstruction.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenInstruction.cpp Tue Nov 13 09:21:47 2012
@@ -34,18 +34,18 @@
 
   if (DefInit *Init = dyn_cast<DefInit>(OutDI->getOperator())) {
     if (Init->getDef()->getName() != "outs")
-      throw R->getName() + ": invalid def name for output list: use 'outs'";
+      PrintFatalError(R->getName() + ": invalid def name for output list: use 'outs'");
   } else
-    throw R->getName() + ": invalid output list: use 'outs'";
+    PrintFatalError(R->getName() + ": invalid output list: use 'outs'");
 
   NumDefs = OutDI->getNumArgs();
 
   DagInit *InDI = R->getValueAsDag("InOperandList");
   if (DefInit *Init = dyn_cast<DefInit>(InDI->getOperator())) {
     if (Init->getDef()->getName() != "ins")
-      throw R->getName() + ": invalid def name for input list: use 'ins'";
+      PrintFatalError(R->getName() + ": invalid def name for input list: use 'ins'");
   } else
-    throw R->getName() + ": invalid input list: use 'ins'";
+    PrintFatalError(R->getName() + ": invalid input list: use 'ins'");
 
   unsigned MIOperandNo = 0;
   std::set<std::string> OperandNames;
@@ -62,7 +62,7 @@
 
     DefInit *Arg = dyn_cast<DefInit>(ArgInit);
     if (!Arg)
-      throw "Illegal operand for the '" + R->getName() + "' instruction!";
+      PrintFatalError("Illegal operand for the '" + R->getName() + "' instruction!");
 
     Record *Rec = Arg->getDef();
     std::string PrintMethod = "printOperand";
@@ -82,8 +82,8 @@
       // Verify that MIOpInfo has an 'ops' root value.
       if (!isa<DefInit>(MIOpInfo->getOperator()) ||
           cast<DefInit>(MIOpInfo->getOperator())->getDef()->getName() != "ops")
-        throw "Bad value for MIOperandInfo in operand '" + Rec->getName() +
-        "'\n";
+        PrintFatalError("Bad value for MIOperandInfo in operand '" + Rec->getName() +
+          "'\n");
 
       // If we have MIOpInfo, then we have #operands equal to number of entries
       // in MIOperandInfo.
@@ -101,16 +101,16 @@
       OperandType = "OPERAND_REGISTER";
     } else if (!Rec->isSubClassOf("PointerLikeRegClass") &&
                !Rec->isSubClassOf("unknown_class"))
-      throw "Unknown operand class '" + Rec->getName() +
-      "' in '" + R->getName() + "' instruction!";
+      PrintFatalError("Unknown operand class '" + Rec->getName() +
+        "' in '" + R->getName() + "' instruction!");
 
     // Check that the operand has a name and that it's unique.
     if (ArgName.empty())
-      throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
-      " has no name!";
+      PrintFatalError("In instruction '" + R->getName() + "', operand #" + utostr(i) +
+        " has no name!");
     if (!OperandNames.insert(ArgName).second)
-      throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
-      " has the same name as a previous operand!";
+      PrintFatalError("In instruction '" + R->getName() + "', operand #" + utostr(i) +
+        " has the same name as a previous operand!");
 
     OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod, EncoderMethod,
                                       OperandType, MIOperandNo, NumOps,
@@ -128,13 +128,13 @@
 
 /// getOperandNamed - Return the index of the operand with the specified
 /// non-empty name.  If the instruction does not have an operand with the
-/// specified name, throw an exception.
+/// specified name, abort.
 ///
 unsigned CGIOperandList::getOperandNamed(StringRef Name) const {
   unsigned OpIdx;
   if (hasOperandNamed(Name, OpIdx)) return OpIdx;
-  throw "'" + TheDef->getName() + "' does not have an operand named '$" +
-    Name.str() + "'!";
+  PrintFatalError("'" + TheDef->getName() + "' does not have an operand named '$" +
+    Name.str() + "'!");
 }
 
 /// hasOperandNamed - Query whether the instruction has an operand of the
@@ -153,7 +153,7 @@
 std::pair<unsigned,unsigned>
 CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   if (Op.empty() || Op[0] != '$')
-    throw TheDef->getName() + ": Illegal operand name: '" + Op + "'";
+    PrintFatalError(TheDef->getName() + ": Illegal operand name: '" + Op + "'");
 
   std::string OpName = Op.substr(1);
   std::string SubOpName;
@@ -163,7 +163,7 @@
   if (DotIdx != std::string::npos) {
     SubOpName = OpName.substr(DotIdx+1);
     if (SubOpName.empty())
-      throw TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'";
+      PrintFatalError(TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'");
     OpName = OpName.substr(0, DotIdx);
   }
 
@@ -173,8 +173,8 @@
     // If one was needed, throw.
     if (OperandList[OpIdx].MINumOperands > 1 && !AllowWholeOp &&
         SubOpName.empty())
-      throw TheDef->getName() + ": Illegal to refer to"
-      " whole operand part of complex operand '" + Op + "'";
+      PrintFatalError(TheDef->getName() + ": Illegal to refer to"
+        " whole operand part of complex operand '" + Op + "'");
 
     // Otherwise, return the operand.
     return std::make_pair(OpIdx, 0U);
@@ -183,7 +183,7 @@
   // Find the suboperand number involved.
   DagInit *MIOpInfo = OperandList[OpIdx].MIOperandInfo;
   if (MIOpInfo == 0)
-    throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'";
+    PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'");
 
   // Find the operand with the right name.
   for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i)
@@ -191,7 +191,7 @@
       return std::make_pair(OpIdx, i);
 
   // Otherwise, didn't find it!
-  throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'";
+  PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'");
 }
 
 static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
@@ -203,13 +203,13 @@
     std::string Name = CStr.substr(wpos+1);
     wpos = Name.find_first_not_of(" \t");
     if (wpos == std::string::npos)
-      throw "Illegal format for @earlyclobber constraint: '" + CStr + "'";
+      PrintFatalError("Illegal format for @earlyclobber constraint: '" + CStr + "'");
     Name = Name.substr(wpos);
     std::pair<unsigned,unsigned> Op = Ops.ParseOperandName(Name, false);
 
     // Build the string for the operand
     if (!Ops[Op.first].Constraints[Op.second].isNone())
-      throw "Operand '" + Name + "' cannot have multiple constraints!";
+      PrintFatalError("Operand '" + Name + "' cannot have multiple constraints!");
     Ops[Op.first].Constraints[Op.second] =
     CGIOperandList::ConstraintInfo::getEarlyClobber();
     return;
@@ -224,14 +224,14 @@
   // TIED_TO: $src1 = $dst
   wpos = Name.find_first_of(" \t");
   if (wpos == std::string::npos)
-    throw "Illegal format for tied-to constraint: '" + CStr + "'";
+    PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'");
   std::string DestOpName = Name.substr(0, wpos);
   std::pair<unsigned,unsigned> DestOp = Ops.ParseOperandName(DestOpName, false);
 
   Name = CStr.substr(pos+1);
   wpos = Name.find_first_not_of(" \t");
   if (wpos == std::string::npos)
-    throw "Illegal format for tied-to constraint: '" + CStr + "'";
+    PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'");
 
   std::string SrcOpName = Name.substr(wpos);
   std::pair<unsigned,unsigned> SrcOp = Ops.ParseOperandName(SrcOpName, false);
@@ -243,7 +243,8 @@
   unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp);
 
   if (!Ops[DestOp.first].Constraints[DestOp.second].isNone())
-    throw "Operand '" + DestOpName + "' cannot have multiple constraints!";
+    PrintFatalError("Operand '" + DestOpName +
+      "' cannot have multiple constraints!");
   Ops[DestOp.first].Constraints[DestOp.second] =
     CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
 }
@@ -328,7 +329,7 @@
   ImplicitUses = R->getValueAsListOfDefs("Uses");
 
   if (neverHasSideEffects + hasSideEffects > 1)
-    throw R->getName() + ": multiple conflicting side-effect flags set!";
+    PrintFatalError(R->getName() + ": multiple conflicting side-effect flags set!");
 
   // Parse Constraints.
   ParseConstraints(R->getValueAsString("Constraints"), Operands);
@@ -422,7 +423,7 @@
     // If the operand is a record, it must have a name, and the record type
     // must match up with the instruction's argument type.
     if (Result->getArgName(AliasOpNo).empty())
-      throw TGError(Loc, "result argument #" + utostr(AliasOpNo) +
+      PrintFatalError(Loc, "result argument #" + utostr(AliasOpNo) +
                     " must have a name!");
     ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef());
     return true;
@@ -457,13 +458,13 @@
 
     if (!T.getRegisterClass(InstOpRec)
         .contains(T.getRegBank().getReg(ADI->getDef())))
-      throw TGError(Loc, "fixed register " + ADI->getDef()->getName() +
-                    " is not a member of the " + InstOpRec->getName() +
-                    " register class!");
+      PrintFatalError(Loc, "fixed register " + ADI->getDef()->getName() +
+                      " is not a member of the " + InstOpRec->getName() +
+                      " register class!");
 
     if (!Result->getArgName(AliasOpNo).empty())
-      throw TGError(Loc, "result fixed register argument must "
-                    "not have a name!");
+      PrintFatalError(Loc, "result fixed register argument must "
+                      "not have a name!");
 
     ResOp = ResultOperand(ADI->getDef());
     return true;
@@ -491,8 +492,8 @@
       return false;
     // Integer arguments can't have names.
     if (!Result->getArgName(AliasOpNo).empty())
-      throw TGError(Loc, "result argument #" + utostr(AliasOpNo) +
-                    " must not have a name!");
+      PrintFatalError(Loc, "result argument #" + utostr(AliasOpNo) +
+                      " must not have a name!");
     ResOp = ResultOperand(II->getValue());
     return true;
   }
@@ -520,7 +521,8 @@
   // Verify that the root of the result is an instruction.
   DefInit *DI = dyn_cast<DefInit>(Result->getOperator());
   if (DI == 0 || !DI->getDef()->isSubClassOf("Instruction"))
-    throw TGError(R->getLoc(), "result of inst alias should be an instruction");
+    PrintFatalError(R->getLoc(),
+                    "result of inst alias should be an instruction");
 
   ResultInst = &T.getInstruction(DI->getDef());
 
@@ -536,9 +538,9 @@
     // same type.
     Record *&Entry = NameClass[Result->getArgName(i)];
     if (Entry && Entry != ADI->getDef())
-      throw TGError(R->getLoc(), "result value $" + Result->getArgName(i) +
-                    " is both " + Entry->getName() + " and " +
-                    ADI->getDef()->getName() + "!");
+      PrintFatalError(R->getLoc(), "result value $" + Result->getArgName(i) +
+                      " is both " + Entry->getName() + " and " +
+                      ADI->getDef()->getName() + "!");
     Entry = ADI->getDef();
   }
 
@@ -554,7 +556,7 @@
       continue;
 
     if (AliasOpNo >= Result->getNumArgs())
-      throw TGError(R->getLoc(), "not enough arguments for instruction!");
+      PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
 
     Record *InstOpRec = ResultInst->Operands[i].Rec;
     unsigned NumSubOps = ResultInst->Operands[i].MINumOperands;
@@ -595,7 +597,7 @@
       DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
       for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
         if (AliasOpNo >= Result->getNumArgs())
-          throw TGError(R->getLoc(), "not enough arguments for instruction!");
+          PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
         Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
         if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false,
                             R->getLoc(), T, ResOp)) {
@@ -603,18 +605,18 @@
           ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
           ++AliasOpNo;
         } else {
-          throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
+          PrintFatalError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
                         " does not match instruction operand class " +
                         (SubOp == 0 ? InstOpRec->getName() :SubRec->getName()));
         }
       }
       continue;
     }
-    throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
-                  " does not match instruction operand class " +
-                  InstOpRec->getName());
+    PrintFatalError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
+                    " does not match instruction operand class " +
+                    InstOpRec->getName());
   }
 
   if (AliasOpNo != Result->getNumArgs())
-    throw TGError(R->getLoc(), "too many operands for instruction!");
+    PrintFatalError(R->getLoc(), "too many operands for instruction!");
 }

Modified: llvm/branches/R600/utils/TableGen/CodeGenInstruction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenInstruction.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenInstruction.h (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenInstruction.h Tue Nov 13 09:21:47 2012
@@ -152,7 +152,7 @@
 
     /// getOperandNamed - Return the index of the operand with the specified
     /// non-empty name.  If the instruction does not have an operand with the
-    /// specified name, throw an exception.
+    /// specified name, abort.
     unsigned getOperandNamed(StringRef Name) const;
 
     /// hasOperandNamed - Query whether the instruction has an operand of the
@@ -162,9 +162,8 @@
 
     /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar",
     /// where $foo is a whole operand and $foo.bar refers to a suboperand.
-    /// This throws an exception if the name is invalid.  If AllowWholeOp is
-    /// true, references to operands with suboperands are allowed, otherwise
-    /// not.
+    /// This aborts if the name is invalid.  If AllowWholeOp is true, references
+    /// to operands with suboperands are allowed, otherwise not.
     std::pair<unsigned,unsigned> ParseOperandName(const std::string &Op,
                                                   bool AllowWholeOp = true);
 

Added: llvm/branches/R600/utils/TableGen/CodeGenMapTable.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenMapTable.cpp?rev=167838&view=auto
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenMapTable.cpp (added)
+++ llvm/branches/R600/utils/TableGen/CodeGenMapTable.cpp Tue Nov 13 09:21:47 2012
@@ -0,0 +1,606 @@
+//===- CodeGenMapTable.cpp - Instruction Mapping Table Generator ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// CodeGenMapTable provides functionality for the TabelGen to create
+// relation mapping between instructions. Relation models are defined using
+// InstrMapping as a base class. This file implements the functionality which
+// parses these definitions and generates relation maps using the information
+// specified there. These maps are emitted as tables in the XXXGenInstrInfo.inc
+// file along with the functions to query them.
+//
+// A relationship model to relate non-predicate instructions with their
+// predicated true/false forms can be defined as follows:
+//
+// def getPredOpcode : InstrMapping {
+//  let FilterClass = "PredRel";
+//  let RowFields = ["BaseOpcode"];
+//  let ColFields = ["PredSense"];
+//  let KeyCol = ["none"];
+//  let ValueCols = [["true"], ["false"]]; }
+//
+// CodeGenMapTable parses this map and generates a table in XXXGenInstrInfo.inc
+// file that contains the instructions modeling this relationship. This table
+// is defined in the function
+// "int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense)"
+// that can be used to retrieve the predicated form of the instruction by
+// passing its opcode value and the predicate sense (true/false) of the desired
+// instruction as arguments.
+//
+// Short description of the algorithm:
+//
+// 1) Iterate through all the records that derive from "InstrMapping" class.
+// 2) For each record, filter out instructions based on the FilterClass value.
+// 3) Iterate through this set of instructions and insert them into
+// RowInstrMap map based on their RowFields values. RowInstrMap is keyed by the
+// vector of RowFields values and contains vectors of Records (instructions) as
+// values. RowFields is a list of fields that are required to have the same
+// values for all the instructions appearing in the same row of the relation
+// table. All the instructions in a given row of the relation table have some
+// sort of relationship with the key instruction defined by the corresponding
+// relationship model.
+//
+// Ex: RowInstrMap(RowVal1, RowVal2, ...) -> [Instr1, Instr2, Instr3, ... ]
+// Here Instr1, Instr2, Instr3 have same values (RowVal1, RowVal2) for
+// RowFields. These groups of instructions are later matched against ValueCols
+// to determine the column they belong to, if any.
+//
+// While building the RowInstrMap map, collect all the key instructions in
+// KeyInstrVec. These are the instructions having the same values as KeyCol
+// for all the fields listed in ColFields.
+//
+// For Example:
+//
+// Relate non-predicate instructions with their predicated true/false forms.
+//
+// def getPredOpcode : InstrMapping {
+//  let FilterClass = "PredRel";
+//  let RowFields = ["BaseOpcode"];
+//  let ColFields = ["PredSense"];
+//  let KeyCol = ["none"];
+//  let ValueCols = [["true"], ["false"]]; }
+//
+// Here, only instructions that have "none" as PredSense will be selected as key
+// instructions.
+//
+// 4) For each key instruction, get the group of instructions that share the
+// same key-value as the key instruction from RowInstrMap. Iterate over the list
+// of columns in ValueCols (it is defined as a list<list<string> >. Therefore,
+// it can specify multi-column relationships). For each column, find the
+// instruction from the group that matches all the values for the column.
+// Multiple matches are not allowed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Error.h"
+using namespace llvm;
+typedef std::map<std::string, std::vector<Record*> > InstrRelMapTy;
+
+typedef std::map<std::vector<Init*>, std::vector<Record*> > RowInstrMapTy;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// This class is used to represent InstrMapping class defined in Target.td file.
+class InstrMap {
+private:
+  std::string Name;
+  std::string FilterClass;
+  ListInit *RowFields;
+  ListInit *ColFields;
+  ListInit *KeyCol;
+  std::vector<ListInit*> ValueCols;
+
+public:
+  InstrMap(Record* MapRec) {
+    Name = MapRec->getName();
+
+    // FilterClass - It's used to reduce the search space only to the
+    // instructions that define the kind of relationship modeled by
+    // this InstrMapping object/record.
+    const RecordVal *Filter = MapRec->getValue("FilterClass");
+    FilterClass = Filter->getValue()->getAsUnquotedString();
+
+    // List of fields/attributes that need to be same across all the
+    // instructions in a row of the relation table.
+    RowFields = MapRec->getValueAsListInit("RowFields");
+
+    // List of fields/attributes that are constant across all the instruction
+    // in a column of the relation table. Ex: ColFields = 'predSense'
+    ColFields = MapRec->getValueAsListInit("ColFields");
+
+    // Values for the fields/attributes listed in 'ColFields'.
+    // Ex: KeyCol = 'noPred' -- key instruction is non predicated
+    KeyCol = MapRec->getValueAsListInit("KeyCol");
+
+    // List of values for the fields/attributes listed in 'ColFields', one for
+    // each column in the relation table.
+    //
+    // Ex: ValueCols = [['true'],['false']] -- it results two columns in the
+    // table. First column requires all the instructions to have predSense
+    // set to 'true' and second column requires it to be 'false'.
+    ListInit *ColValList = MapRec->getValueAsListInit("ValueCols");
+
+    // Each instruction map must specify at least one column for it to be valid.
+    if (ColValList->getSize() == 0)
+      PrintFatalError(MapRec->getLoc(), "InstrMapping record `" +
+        MapRec->getName() + "' has empty " + "`ValueCols' field!");
+
+    for (unsigned i = 0, e = ColValList->getSize(); i < e; i++) {
+      ListInit *ColI = dyn_cast<ListInit>(ColValList->getElement(i));
+
+      // Make sure that all the sub-lists in 'ValueCols' have same number of
+      // elements as the fields in 'ColFields'.
+      if (ColI->getSize() != ColFields->getSize())
+        PrintFatalError(MapRec->getLoc(), "Record `" + MapRec->getName() +
+          "', field `ValueCols' entries don't match with " +
+          " the entries in 'ColFields'!");
+      ValueCols.push_back(ColI);
+    }
+  }
+
+  std::string getName() const {
+    return Name;
+  }
+
+  std::string getFilterClass() {
+    return FilterClass;
+  }
+
+  ListInit *getRowFields() const {
+    return RowFields;
+  }
+
+  ListInit *getColFields() const {
+    return ColFields;
+  }
+
+  ListInit *getKeyCol() const {
+    return KeyCol;
+  }
+
+  const std::vector<ListInit*> &getValueCols() const {
+    return ValueCols;
+  }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// class MapTableEmitter : It builds the instruction relation maps using
+// the information provided in InstrMapping records. It outputs these
+// relationship maps as tables into XXXGenInstrInfo.inc file along with the
+// functions to query them.
+
+namespace {
+class MapTableEmitter {
+private:
+//  std::string TargetName;
+  const CodeGenTarget &Target;
+  // InstrMapDesc - InstrMapping record to be processed.
+  InstrMap InstrMapDesc;
+
+  // InstrDefs - list of instructions filtered using FilterClass defined
+  // in InstrMapDesc.
+  std::vector<Record*> InstrDefs;
+
+  // RowInstrMap - maps RowFields values to the instructions. It's keyed by the
+  // values of the row fields and contains vector of records as values.
+  RowInstrMapTy RowInstrMap;
+
+  // KeyInstrVec - list of key instructions.
+  std::vector<Record*> KeyInstrVec;
+  DenseMap<Record*, std::vector<Record*> > MapTable;
+
+public:
+  MapTableEmitter(CodeGenTarget &Target, RecordKeeper &Records, Record *IMRec):
+                  Target(Target), InstrMapDesc(IMRec) {
+    const std::string FilterClass = InstrMapDesc.getFilterClass();
+    InstrDefs = Records.getAllDerivedDefinitions(FilterClass);
+  }
+
+  void buildRowInstrMap();
+
+  // Returns true if an instruction is a key instruction, i.e., its ColFields
+  // have same values as KeyCol.
+  bool isKeyColInstr(Record* CurInstr);
+
+  // Find column instruction corresponding to a key instruction based on the
+  // constraints for that column.
+  Record *getInstrForColumn(Record *KeyInstr, ListInit *CurValueCol);
+
+  // Find column instructions for each key instruction based
+  // on ValueCols and store them into MapTable.
+  void buildMapTable();
+
+  void emitBinSearch(raw_ostream &OS, unsigned TableSize);
+  void emitTablesWithFunc(raw_ostream &OS);
+  unsigned emitBinSearchTable(raw_ostream &OS);
+
+  // Lookup functions to query binary search tables.
+  void emitMapFuncBody(raw_ostream &OS, unsigned TableSize);
+
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// Process all the instructions that model this relation (alreday present in
+// InstrDefs) and insert them into RowInstrMap which is keyed by the values of
+// the fields listed as RowFields. It stores vectors of records as values.
+// All the related instructions have the same values for the RowFields thus are
+// part of the same key-value pair.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::buildRowInstrMap() {
+  for (unsigned i = 0, e = InstrDefs.size(); i < e; i++) {
+    std::vector<Record*> InstrList;
+    Record *CurInstr = InstrDefs[i];
+    std::vector<Init*> KeyValue;
+    ListInit *RowFields = InstrMapDesc.getRowFields();
+    for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) {
+      Init *RowFieldsJ = RowFields->getElement(j);
+      Init *CurInstrVal = CurInstr->getValue(RowFieldsJ)->getValue();
+      KeyValue.push_back(CurInstrVal);
+    }
+
+    // Collect key instructions into KeyInstrVec. Later, these instructions are
+    // processed to assign column position to the instructions sharing
+    // their KeyValue in RowInstrMap.
+    if (isKeyColInstr(CurInstr))
+      KeyInstrVec.push_back(CurInstr);
+
+    RowInstrMap[KeyValue].push_back(CurInstr);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Return true if an instruction is a KeyCol instruction.
+//===----------------------------------------------------------------------===//
+
+bool MapTableEmitter::isKeyColInstr(Record* CurInstr) {
+  ListInit *ColFields = InstrMapDesc.getColFields();
+  ListInit *KeyCol = InstrMapDesc.getKeyCol();
+
+  // Check if the instruction is a KeyCol instruction.
+  bool MatchFound = true;
+  for (unsigned j = 0, endCF = ColFields->getSize();
+      (j < endCF) && MatchFound; j++) {
+    RecordVal *ColFieldName = CurInstr->getValue(ColFields->getElement(j));
+    std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString();
+    std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString();
+    MatchFound = (CurInstrVal == KeyColValue);
+  }
+  return MatchFound;
+}
+
+//===----------------------------------------------------------------------===//
+// Build a map to link key instructions with the column instructions arranged
+// according to their column positions.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::buildMapTable() {
+  // Find column instructions for a given key based on the ColField
+  // constraints.
+  const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+  unsigned NumOfCols = ValueCols.size();
+  for (unsigned j = 0, endKI = KeyInstrVec.size(); j < endKI; j++) {
+    Record *CurKeyInstr = KeyInstrVec[j];
+    std::vector<Record*> ColInstrVec(NumOfCols);
+
+    // Find the column instruction based on the constraints for the column.
+    for (unsigned ColIdx = 0; ColIdx < NumOfCols; ColIdx++) {
+      ListInit *CurValueCol = ValueCols[ColIdx];
+      Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol);
+      ColInstrVec[ColIdx] = ColInstr;
+    }
+    MapTable[CurKeyInstr] = ColInstrVec;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Find column instruction based on the constraints for that column.
+//===----------------------------------------------------------------------===//
+
+Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
+                                           ListInit *CurValueCol) {
+  ListInit *RowFields = InstrMapDesc.getRowFields();
+  std::vector<Init*> KeyValue;
+
+  // Construct KeyValue using KeyInstr's values for RowFields.
+  for (unsigned j = 0, endRF = RowFields->getSize(); j < endRF; j++) {
+    Init *RowFieldsJ = RowFields->getElement(j);
+    Init *KeyInstrVal = KeyInstr->getValue(RowFieldsJ)->getValue();
+    KeyValue.push_back(KeyInstrVal);
+  }
+
+  // Get all the instructions that share the same KeyValue as the KeyInstr
+  // in RowInstrMap. We search through these instructions to find a match
+  // for the current column, i.e., the instruction which has the same values
+  // as CurValueCol for all the fields in ColFields.
+  const std::vector<Record*> &RelatedInstrVec = RowInstrMap[KeyValue];
+
+  ListInit *ColFields = InstrMapDesc.getColFields();
+  Record *MatchInstr = NULL;
+
+  for (unsigned i = 0, e = RelatedInstrVec.size(); i < e; i++) {
+    bool MatchFound = true;
+    Record *CurInstr = RelatedInstrVec[i];
+    for (unsigned j = 0, endCF = ColFields->getSize();
+        (j < endCF) && MatchFound; j++) {
+      Init *ColFieldJ = ColFields->getElement(j);
+      Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue();
+      std::string CurInstrVal = CurInstrInit->getAsUnquotedString();
+      Init *ColFieldJVallue = CurValueCol->getElement(j);
+      MatchFound = (CurInstrVal == ColFieldJVallue->getAsUnquotedString());
+    }
+
+    if (MatchFound) {
+      if (MatchInstr) // Already had a match
+        // Error if multiple matches are found for a column.
+        PrintFatalError("Multiple matches found for `" + KeyInstr->getName() +
+              "', for the relation `" + InstrMapDesc.getName());
+      MatchInstr = CurInstr;
+    }
+  }
+  return MatchInstr;
+}
+
+//===----------------------------------------------------------------------===//
+// Emit one table per relation. Only instructions with a valid relation of a
+// given type are included in the table sorted by their enum values (opcodes).
+// Binary search is used for locating instructions in the table.
+//===----------------------------------------------------------------------===//
+
+unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
+
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+                                            Target.getInstructionsByEnumValue();
+  std::string TargetName = Target.getName();
+  const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+  unsigned NumCol = ValueCols.size();
+  unsigned TotalNumInstr = NumberedInstructions.size();
+  unsigned TableSize = 0;
+
+  OS << "static const uint16_t "<<InstrMapDesc.getName();
+  // Number of columns in the table are NumCol+1 because key instructions are
+  // emitted as first column.
+  OS << "Table[]["<< NumCol+1 << "] = {\n";
+  for (unsigned i = 0; i < TotalNumInstr; i++) {
+    Record *CurInstr = NumberedInstructions[i]->TheDef;
+    std::vector<Record*> ColInstrs = MapTable[CurInstr];
+    std::string OutStr("");
+    unsigned RelExists = 0;
+    if (ColInstrs.size()) {
+      for (unsigned j = 0; j < NumCol; j++) {
+        if (ColInstrs[j] != NULL) {
+          RelExists = 1;
+          OutStr += ", ";
+          OutStr += TargetName;
+          OutStr += "::";
+          OutStr += ColInstrs[j]->getName();
+        } else { OutStr += ", -1";}
+      }
+
+      if (RelExists) {
+        OS << "  { " << TargetName << "::" << CurInstr->getName();
+        OS << OutStr <<" },\n";
+        TableSize++;
+      }
+    }
+  }
+  if (!TableSize) {
+    OS << "  { " << TargetName << "::" << "INSTRUCTION_LIST_END, ";
+    OS << TargetName << "::" << "INSTRUCTION_LIST_END }";
+  }
+  OS << "}; // End of " << InstrMapDesc.getName() << "Table\n\n";
+  return TableSize;
+}
+
+//===----------------------------------------------------------------------===//
+// Emit binary search algorithm as part of the functions used to query
+// relation tables.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::emitBinSearch(raw_ostream &OS, unsigned TableSize) {
+  OS << "  unsigned mid;\n";
+  OS << "  unsigned start = 0;\n";
+  OS << "  unsigned end = " << TableSize << ";\n";
+  OS << "  while (start < end) {\n";
+  OS << "    mid = start + (end - start)/2;\n";
+  OS << "    if (Opcode == " << InstrMapDesc.getName() << "Table[mid][0]) {\n";
+  OS << "      break;\n";
+  OS << "    }\n";
+  OS << "    if (Opcode < " << InstrMapDesc.getName() << "Table[mid][0])\n";
+  OS << "      end = mid;\n";
+  OS << "    else\n";
+  OS << "      start = mid + 1;\n";
+  OS << "  }\n";
+  OS << "  if (start == end)\n";
+  OS << "    return -1; // Instruction doesn't exist in this table.\n\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Emit functions to query relation tables.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::emitMapFuncBody(raw_ostream &OS,
+                                           unsigned TableSize) {
+
+  ListInit *ColFields = InstrMapDesc.getColFields();
+  const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+
+  // Emit binary search algorithm to locate instructions in the
+  // relation table. If found, return opcode value from the appropriate column
+  // of the table.
+  emitBinSearch(OS, TableSize);
+
+  if (ValueCols.size() > 1) {
+    for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
+      ListInit *ColumnI = ValueCols[i];
+      for (unsigned j = 0, ColSize = ColumnI->getSize(); j < ColSize; j++) {
+        std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
+        OS << "  if (in" << ColName;
+        OS << " == ";
+        OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString();
+        if (j < ColumnI->getSize() - 1) OS << " && ";
+        else OS << ")\n";
+      }
+      OS << "    return " << InstrMapDesc.getName();
+      OS << "Table[mid]["<<i+1<<"];\n";
+    }
+    OS << "  return -1;";
+  }
+  else
+    OS << "  return " << InstrMapDesc.getName() << "Table[mid][1];\n";
+
+  OS <<"}\n\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Emit relation tables and the functions to query them.
+//===----------------------------------------------------------------------===//
+
+void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
+
+  // Emit function name and the input parameters : mostly opcode value of the
+  // current instruction. However, if a table has multiple columns (more than 2
+  // since first column is used for the key instructions), then we also need
+  // to pass another input to indicate the column to be selected.
+
+  ListInit *ColFields = InstrMapDesc.getColFields();
+  const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
+  OS << "// "<< InstrMapDesc.getName() << "\n";
+  OS << "int "<< InstrMapDesc.getName() << "(uint16_t Opcode";
+  if (ValueCols.size() > 1) {
+    for (unsigned i = 0, e = ColFields->getSize(); i < e; i++) {
+      std::string ColName = ColFields->getElement(i)->getAsUnquotedString();
+      OS << ", enum " << ColName << " in" << ColName << ") {\n";
+    }
+  } else { OS << ") {\n"; }
+
+  // Emit map table.
+  unsigned TableSize = emitBinSearchTable(OS);
+
+  // Emit rest of the function body.
+  emitMapFuncBody(OS, TableSize);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit enums for the column fields across all the instruction maps.
+//===----------------------------------------------------------------------===//
+
+static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
+
+  std::vector<Record*> InstrMapVec;
+  InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
+  std::map<std::string, std::vector<Init*> > ColFieldValueMap;
+
+  // Iterate over all InstrMapping records and create a map between column
+  // fields and their possible values across all records.
+  for (unsigned i = 0, e = InstrMapVec.size(); i < e; i++) {
+    Record *CurMap = InstrMapVec[i];
+    ListInit *ColFields;
+    ColFields = CurMap->getValueAsListInit("ColFields");
+    ListInit *List = CurMap->getValueAsListInit("ValueCols");
+    std::vector<ListInit*> ValueCols;
+    unsigned ListSize = List->getSize();
+
+    for (unsigned j = 0; j < ListSize; j++) {
+      ListInit *ListJ = dyn_cast<ListInit>(List->getElement(j));
+
+      if (ListJ->getSize() != ColFields->getSize())
+        PrintFatalError("Record `" + CurMap->getName() + "', field "
+          "`ValueCols' entries don't match with the entries in 'ColFields' !");
+      ValueCols.push_back(ListJ);
+    }
+
+    for (unsigned j = 0, endCF = ColFields->getSize(); j < endCF; j++) {
+      for (unsigned k = 0; k < ListSize; k++){
+        std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
+        ColFieldValueMap[ColName].push_back((ValueCols[k])->getElement(j));
+      }
+    }
+  }
+
+  for (std::map<std::string, std::vector<Init*> >::iterator
+       II = ColFieldValueMap.begin(), IE = ColFieldValueMap.end();
+       II != IE; II++) {
+    std::vector<Init*> FieldValues = (*II).second;
+    unsigned FieldSize = FieldValues.size();
+
+    // Delete duplicate entries from ColFieldValueMap
+    for (unsigned i = 0; i < FieldSize - 1; i++) {
+      Init *CurVal = FieldValues[i];
+      for (unsigned j = i+1; j < FieldSize; j++) {
+        if (CurVal == FieldValues[j]) {
+          FieldValues.erase(FieldValues.begin()+j);
+        }
+      }
+    }
+
+    // Emit enumerated values for the column fields.
+    OS << "enum " << (*II).first << " {\n";
+    for (unsigned i = 0; i < FieldSize; i++) {
+      OS << "\t" << (*II).first << "_" << FieldValues[i]->getAsUnquotedString();
+      if (i != FieldValues.size() - 1)
+        OS << ",\n";
+      else
+        OS << "\n};\n\n";
+    }
+  }
+}
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+// Parse 'InstrMapping' records and use the information to form relationship
+// between instructions. These relations are emitted as a tables along with the
+// functions to query them.
+//===----------------------------------------------------------------------===//
+void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
+  CodeGenTarget Target(Records);
+  std::string TargetName = Target.getName();
+  std::vector<Record*> InstrMapVec;
+  InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
+
+  if (!InstrMapVec.size())
+    return;
+
+  OS << "#ifdef GET_INSTRMAP_INFO\n";
+  OS << "#undef GET_INSTRMAP_INFO\n";
+  OS << "namespace llvm {\n\n";
+  OS << "namespace " << TargetName << " {\n\n";
+
+  // Emit coulumn field names and their values as enums.
+  emitEnums(OS, Records);
+
+  // Iterate over all instruction mapping records and construct relationship
+  // maps based on the information specified there.
+  //
+  for (unsigned i = 0, e = InstrMapVec.size(); i < e; i++) {
+    MapTableEmitter IMap(Target, Records, InstrMapVec[i]);
+
+    // Build RowInstrMap to group instructions based on their values for
+    // RowFields. In the process, also collect key instructions into
+    // KeyInstrVec.
+    IMap.buildRowInstrMap();
+
+    // Build MapTable to map key instructions with the corresponding column
+    // instructions.
+    IMap.buildMapTable();
+
+    // Emit map tables and the functions to query them.
+    IMap.emitTablesWithFunc(OS);
+  }
+  OS << "} // End " << TargetName << " namespace\n";
+  OS << "} // End llvm namespace\n";
+  OS << "#endif // GET_INSTRMAP_INFO\n\n";
+}
+
+} // End llvm namespace

Modified: llvm/branches/R600/utils/TableGen/CodeGenRegisters.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenRegisters.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenRegisters.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenRegisters.cpp Tue Nov 13 09:21:47 2012
@@ -54,19 +54,20 @@
   std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
   if (!Comps.empty()) {
     if (Comps.size() != 2)
-      throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
+      PrintFatalError(TheDef->getLoc(),
+                      "ComposedOf must have exactly two entries");
     CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
     CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
     CodeGenSubRegIndex *X = A->addComposite(B, this);
     if (X)
-      throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+      PrintFatalError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
   }
 
   std::vector<Record*> Parts =
     TheDef->getValueAsListOfDefs("CoveringSubRegIndices");
   if (!Parts.empty()) {
     if (Parts.size() < 2)
-      throw TGError(TheDef->getLoc(),
+      PrintFatalError(TheDef->getLoc(),
                     "CoveredBySubRegs must have two or more entries");
     SmallVector<CodeGenSubRegIndex*, 8> IdxParts;
     for (unsigned i = 0, e = Parts.size(); i != e; ++i)
@@ -112,8 +113,8 @@
   std::vector<Record*> SRs = TheDef->getValueAsListOfDefs("SubRegs");
 
   if (SRIs.size() != SRs.size())
-    throw TGError(TheDef->getLoc(),
-                  "SubRegs and SubRegIndices must have the same size");
+    PrintFatalError(TheDef->getLoc(),
+                    "SubRegs and SubRegIndices must have the same size");
 
   for (unsigned i = 0, e = SRIs.size(); i != e; ++i) {
     ExplicitSubRegIndices.push_back(RegBank.getSubRegIdx(SRIs[i]));
@@ -224,8 +225,8 @@
     CodeGenRegister *SR = ExplicitSubRegs[i];
     CodeGenSubRegIndex *Idx = ExplicitSubRegIndices[i];
     if (!SubRegs.insert(std::make_pair(Idx, SR)).second)
-      throw TGError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() +
-                    " appears twice in Register " + getName());
+      PrintFatalError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() +
+                      " appears twice in Register " + getName());
     // Map explicit sub-registers first, so the names take precedence.
     // The inherited sub-registers are mapped below.
     SubReg2Idx.insert(std::make_pair(SR, Idx));
@@ -308,8 +309,8 @@
       ArrayRef<SMLoc> Loc;
       if (TheDef)
         Loc = TheDef->getLoc();
-      throw TGError(Loc, "Register " + getName() +
-                    " has itself as a sub-register");
+      PrintFatalError(Loc, "Register " + getName() +
+                      " has itself as a sub-register");
     }
     // Ensure that every sub-register has a unique name.
     DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins =
@@ -320,7 +321,7 @@
     ArrayRef<SMLoc> Loc;
     if (TheDef)
       Loc = TheDef->getLoc();
-    throw TGError(Loc, "Sub-register can't have two names: " +
+    PrintFatalError(Loc, "Sub-register can't have two names: " +
                   SI->second->getName() + " available as " +
                   SI->first->getName() + " and " + Ins->second->getName());
   }
@@ -467,8 +468,8 @@
            SE = NewSubReg->SubRegs.end(); SI != SE; ++SI) {
       CodeGenSubRegIndex *SubIdx = getSubRegIndex(SI->second);
       if (!SubIdx)
-        throw TGError(TheDef->getLoc(), "No SubRegIndex for " +
-                      SI->second->getName() + " in " + getName());
+        PrintFatalError(TheDef->getLoc(), "No SubRegIndex for " +
+                        SI->second->getName() + " in " + getName());
       NewIdx->addComposite(SI->first, SubIdx);
     }
   }
@@ -592,15 +593,16 @@
     unsigned Dim = Indices.size();
     ListInit *SubRegs = Def->getValueAsListInit("SubRegs");
     if (Dim != SubRegs->getSize())
-      throw TGError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch");
+      PrintFatalError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch");
     if (Dim < 2)
-      throw TGError(Def->getLoc(), "Tuples must have at least 2 sub-registers");
+      PrintFatalError(Def->getLoc(),
+                      "Tuples must have at least 2 sub-registers");
 
     // Evaluate the sub-register lists to be zipped.
     unsigned Length = ~0u;
     SmallVector<SetTheory::RecSet, 4> Lists(Dim);
     for (unsigned i = 0; i != Dim; ++i) {
-      ST.evaluate(SubRegs->getElement(i), Lists[i]);
+      ST.evaluate(SubRegs->getElement(i), Lists[i], Def->getLoc());
       Length = std::min(Length, unsigned(Lists[i].size()));
     }
 
@@ -706,8 +708,8 @@
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
     Record *Type = TypeList[i];
     if (!Type->isSubClassOf("ValueType"))
-      throw "RegTypes list member '" + Type->getName() +
-        "' does not derive from the ValueType class!";
+      PrintFatalError("RegTypes list member '" + Type->getName() +
+        "' does not derive from the ValueType class!");
     VTs.push_back(getValueType(Type));
   }
   assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
@@ -728,14 +730,14 @@
   // Alternative allocation orders may be subsets.
   SetTheory::RecSet Order;
   for (unsigned i = 0, e = AltOrders->size(); i != e; ++i) {
-    RegBank.getSets().evaluate(AltOrders->getElement(i), Order);
+    RegBank.getSets().evaluate(AltOrders->getElement(i), Order, R->getLoc());
     Orders[1 + i].append(Order.begin(), Order.end());
     // Verify that all altorder members are regclass members.
     while (!Order.empty()) {
       CodeGenRegister *Reg = RegBank.getReg(Order.back());
       Order.pop_back();
       if (!contains(Reg))
-        throw TGError(R->getLoc(), " AltOrder register " + Reg->getName() +
+        PrintFatalError(R->getLoc(), " AltOrder register " + Reg->getName() +
                       " is not a class member");
     }
   }
@@ -1021,7 +1023,7 @@
   // Read in register class definitions.
   std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass");
   if (RCs.empty())
-    throw std::string("No 'RegisterClass' subclasses defined!");
+    PrintFatalError(std::string("No 'RegisterClass' subclasses defined!"));
 
   // Allocate user-defined register classes.
   RegClasses.reserve(RCs.size());
@@ -1098,7 +1100,7 @@
   if (CodeGenRegisterClass *RC = Def2RC[Def])
     return RC;
 
-  throw TGError(Def->getLoc(), "Not a known RegisterClass!");
+  PrintFatalError(Def->getLoc(), "Not a known RegisterClass!");
 }
 
 CodeGenSubRegIndex*

Modified: llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp Tue Nov 13 09:21:47 2012
@@ -38,8 +38,9 @@
 
 // (instrs a, b, ...) Evaluate and union all arguments. Identical to AddOp.
 struct InstrsOp : public SetTheory::Operator {
-  void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts) {
-    ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts);
+  void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+             ArrayRef<SMLoc> Loc) {
+    ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc);
   }
 };
 
@@ -55,13 +56,15 @@
   const CodeGenTarget &Target;
   InstRegexOp(const CodeGenTarget &t): Target(t) {}
 
-  void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts) {
+  void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
+             ArrayRef<SMLoc> Loc) {
     SmallVector<Regex*, 4> RegexList;
     for (DagInit::const_arg_iterator
            AI = Expr->arg_begin(), AE = Expr->arg_end(); AI != AE; ++AI) {
       StringInit *SI = dyn_cast<StringInit>(*AI);
       if (!SI)
-        throw "instregex requires pattern string: " + Expr->getAsString();
+        PrintFatalError(Loc, "instregex requires pattern string: "
+          + Expr->getAsString());
       std::string pat = SI->getValue();
       // Implement a python-style prefix match.
       if (pat[0] != '^') {
@@ -265,13 +268,13 @@
     Record *AliasDef = (*AI)->getValueAsDef("AliasRW");
     if (MatchDef->isSubClassOf("SchedWrite")) {
       if (!AliasDef->isSubClassOf("SchedWrite"))
-        throw TGError((*AI)->getLoc(), "SchedWrite Alias must be SchedWrite");
+        PrintFatalError((*AI)->getLoc(), "SchedWrite Alias must be SchedWrite");
       scanSchedRW(AliasDef, SWDefs, RWSet);
     }
     else {
       assert(MatchDef->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
       if (!AliasDef->isSubClassOf("SchedRead"))
-        throw TGError((*AI)->getLoc(), "SchedRead Alias must be SchedRead");
+        PrintFatalError((*AI)->getLoc(), "SchedRead Alias must be SchedRead");
       scanSchedRW(AliasDef, SRDefs, RWSet);
     }
   }
@@ -302,7 +305,7 @@
     Record *MatchDef = (*AI)->getValueAsDef("MatchRW");
     CodeGenSchedRW &RW = getSchedRW(MatchDef);
     if (RW.IsAlias)
-      throw TGError((*AI)->getLoc(), "Cannot Alias an Alias");
+      PrintFatalError((*AI)->getLoc(), "Cannot Alias an Alias");
     RW.Aliases.push_back(*AI);
   }
   DEBUG(
@@ -434,9 +437,9 @@
         continue;
     }
     if (AliasDef)
-      throw TGError(AliasRW.TheDef->getLoc(), "Multiple aliases "
-                    "defined for processor " + ProcModel.ModelName +
-                    " Ensure only one SchedAlias exists per RW.");
+      PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases "
+                      "defined for processor " + ProcModel.ModelName +
+                      " Ensure only one SchedAlias exists per RW.");
     AliasDef = AliasRW.TheDef;
   }
   if (AliasDef) {
@@ -703,7 +706,7 @@
   // Sort Instrs into sets.
   const RecVec *InstDefs = Sets.expand(InstRWDef);
   if (InstDefs->empty())
-    throw TGError(InstRWDef->getLoc(), "No matching instruction opcodes");
+    PrintFatalError(InstRWDef->getLoc(), "No matching instruction opcodes");
 
   for (RecIter I = InstDefs->begin(), E = InstDefs->end(); I != E; ++I) {
     unsigned SCIdx = 0;
@@ -763,7 +766,7 @@
         for (RecIter RI = SchedClasses[OldSCIdx].InstRWs.begin(),
                RE = SchedClasses[OldSCIdx].InstRWs.end(); RI != RE; ++RI) {
           if ((*RI)->getValueAsDef("SchedModel") == RWModelDef) {
-            throw TGError(InstRWDef->getLoc(), "Overlapping InstRW def " +
+            PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " +
                           (*II)->getName() + " also matches " +
                           (*RI)->getValue("Instrs")->getValue()->getAsString());
           }
@@ -822,11 +825,11 @@
   std::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord());
   for (RecIter II = ItinRWDefs.begin(), IE = ItinRWDefs.end(); II != IE; ++II) {
     if (!(*II)->getValueInit("SchedModel")->isComplete())
-      throw TGError((*II)->getLoc(), "SchedModel is undefined");
+      PrintFatalError((*II)->getLoc(), "SchedModel is undefined");
     Record *ModelDef = (*II)->getValueAsDef("SchedModel");
     ProcModelMapTy::const_iterator I = ProcModelMap.find(ModelDef);
     if (I == ProcModelMap.end()) {
-      throw TGError((*II)->getLoc(), "Undefined SchedMachineModel "
+      PrintFatalError((*II)->getLoc(), "Undefined SchedMachineModel "
                     + ModelDef->getName());
     }
     ProcModels[I->second].ItinRWDefs.push_back(*II);
@@ -864,7 +867,7 @@
       if (!std::count(Matched.begin(), Matched.end(), ItinClassDef))
         continue;
       if (HasMatch)
-        throw TGError((*II)->getLoc(), "Duplicate itinerary class "
+        PrintFatalError((*II)->getLoc(), "Duplicate itinerary class "
                       + ItinClassDef->getName()
                       + " in ItinResources for " + PM.ModelName);
       HasMatch = true;
@@ -1092,9 +1095,10 @@
       if (Cnt > 1) {
         const CodeGenProcModel &PM =
           *(SchedModels.procModelBegin() + Variant.ProcIdx);
-        throw TGError(Variant.VarOrSeqDef->getLoc(),
-                      "Multiple variants defined for processor " + PM.ModelName +
-                      " Ensure only one SchedAlias exists per RW.");
+        PrintFatalError(Variant.VarOrSeqDef->getLoc(),
+                        "Multiple variants defined for processor " +
+                        PM.ModelName +
+                        " Ensure only one SchedAlias exists per RW.");
       }
     }
     if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) {
@@ -1212,8 +1216,9 @@
       std::vector<TransVariant> IntersectingVariants;
       getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
       if (IntersectingVariants.empty())
-        throw TGError(SchedRW.TheDef->getLoc(), "No variant of this type has a "
-                      "matching predicate on any processor ");
+        PrintFatalError(SchedRW.TheDef->getLoc(),
+                      "No variant of this type has "
+                      "a matching predicate on any processor");
       // Now expand each variant on top of its copy of the transition.
       for (std::vector<TransVariant>::const_iterator
              IVI = IntersectingVariants.begin(),
@@ -1437,9 +1442,9 @@
       if (!std::count(Matched.begin(), Matched.end(), ItinClassDef))
         continue;
       if (HasMatch)
-        throw TGError((*II)->getLoc(), "Duplicate itinerary class "
-                      + ItinClassDef->getName()
-                      + " in ItinResources for " + PM.ModelName);
+        PrintFatalError((*II)->getLoc(), "Duplicate itinerary class "
+                        + ItinClassDef->getName()
+                        + " in ItinResources for " + PM.ModelName);
       HasMatch = true;
       IdxVec Writes, Reads;
       findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
@@ -1516,17 +1521,17 @@
     if ((*RI)->getValueAsDef("Kind") == ProcResKind
         && (*RI)->getValueAsDef("SchedModel") == PM.ModelDef) {
       if (ProcUnitDef) {
-        throw TGError((*RI)->getLoc(),
-                      "Multiple ProcessorResourceUnits associated with "
-                      + ProcResKind->getName());
+        PrintFatalError((*RI)->getLoc(),
+                        "Multiple ProcessorResourceUnits associated with "
+                        + ProcResKind->getName());
       }
       ProcUnitDef = *RI;
     }
   }
   if (!ProcUnitDef) {
-    throw TGError(ProcResKind->getLoc(),
-                  "No ProcessorResources associated with "
-                  + ProcResKind->getName());
+    PrintFatalError(ProcResKind->getLoc(),
+                    "No ProcessorResources associated with "
+                    + ProcResKind->getName());
   }
   return ProcUnitDef;
 }
@@ -1583,10 +1588,10 @@
   RecIter PRPos = std::find(ProcResourceDefs.begin(), ProcResourceDefs.end(),
                             PRDef);
   if (PRPos == ProcResourceDefs.end())
-    throw TGError(PRDef->getLoc(), "ProcResource def is not included in "
-                  "the ProcResources list for " + ModelName);
+    PrintFatalError(PRDef->getLoc(), "ProcResource def is not included in "
+                    "the ProcResources list for " + ModelName);
   // Idx=0 is reserved for invalid.
-  return 1 + PRPos - ProcResourceDefs.begin();
+  return 1 + (PRPos - ProcResourceDefs.begin());
 }
 
 #ifndef NDEBUG

Modified: llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp Tue Nov 13 09:21:47 2012
@@ -10,13 +10,14 @@
 // This class wraps target description classes used by the various code
 // generation TableGen backends.  This makes it easier to access the data and
 // provides a single place that needs to check it for validity.  All of these
-// classes throw exceptions on error conditions.
+// classes abort on error conditions.
 //
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
 #include "CodeGenSchedule.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
@@ -124,9 +125,9 @@
   : Records(records), RegBank(0), SchedModels(0) {
   std::vector<Record*> Targets = Records.getAllDerivedDefinitions("Target");
   if (Targets.size() == 0)
-    throw std::string("ERROR: No 'Target' subclasses defined!");
+    PrintFatalError("ERROR: No 'Target' subclasses defined!");
   if (Targets.size() != 1)
-    throw std::string("ERROR: Multiple subclasses of Target defined!");
+    PrintFatalError("ERROR: Multiple subclasses of Target defined!");
   TargetRec = Targets[0];
 }
 
@@ -160,7 +161,7 @@
 Record *CodeGenTarget::getAsmParser() const {
   std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyParsers");
   if (AsmParserNum >= LI.size())
-    throw "Target does not have an AsmParser #" + utostr(AsmParserNum) + "!";
+    PrintFatalError("Target does not have an AsmParser #" + utostr(AsmParserNum) + "!");
   return LI[AsmParserNum];
 }
 
@@ -171,7 +172,7 @@
   std::vector<Record*> LI =
     TargetRec->getValueAsListOfDefs("AssemblyParserVariants");
   if (i >= LI.size())
-    throw "Target does not have an AsmParserVariant #" + utostr(i) + "!";
+    PrintFatalError("Target does not have an AsmParserVariant #" + utostr(i) + "!");
   return LI[i];
 }
 
@@ -189,7 +190,7 @@
 Record *CodeGenTarget::getAsmWriter() const {
   std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyWriters");
   if (AsmWriterNum >= LI.size())
-    throw "Target does not have an AsmWriter #" + utostr(AsmWriterNum) + "!";
+    PrintFatalError("Target does not have an AsmWriter #" + utostr(AsmWriterNum) + "!");
   return LI[AsmWriterNum];
 }
 
@@ -256,7 +257,7 @@
 void CodeGenTarget::ReadInstructions() const {
   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
   if (Insts.size() <= 2)
-    throw std::string("No 'Instruction' subclasses defined!");
+    PrintFatalError("No 'Instruction' subclasses defined!");
 
   // Parse the instructions defined in the .td file.
   for (unsigned i = 0, e = Insts.size(); i != e; ++i)
@@ -272,7 +273,7 @@
   DenseMap<const Record*, CodeGenInstruction*>::const_iterator
     I = Insts.find(Rec);
   if (Rec == 0 || I == Insts.end())
-    throw std::string("Could not find '") + Name + "' instruction!";
+    PrintFatalError(std::string("Could not find '") + Name + "' instruction!");
   return I->second;
 }
 
@@ -419,7 +420,7 @@
 
   if (DefName.size() <= 4 ||
       std::string(DefName.begin(), DefName.begin() + 4) != "int_")
-    throw "Intrinsic '" + DefName + "' does not start with 'int_'!";
+    PrintFatalError("Intrinsic '" + DefName + "' does not start with 'int_'!");
 
   EnumName = std::string(DefName.begin()+4, DefName.end());
 
@@ -439,7 +440,7 @@
     // Verify it starts with "llvm.".
     if (Name.size() <= 5 ||
         std::string(Name.begin(), Name.begin() + 5) != "llvm.")
-      throw "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!";
+      PrintFatalError("Intrinsic '" + DefName + "'s name does not start with 'llvm.'!");
   }
 
   // If TargetPrefix is specified, make sure that Name starts with
@@ -448,8 +449,8 @@
     if (Name.size() < 6+TargetPrefix.size() ||
         std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size())
         != (TargetPrefix + "."))
-      throw "Intrinsic '" + DefName + "' does not start with 'llvm." +
-        TargetPrefix + ".'!";
+      PrintFatalError("Intrinsic '" + DefName + "' does not start with 'llvm." +
+        TargetPrefix + ".'!");
   }
 
   // Parse the list of return types.
@@ -481,7 +482,7 @@
 
     // Reject invalid types.
     if (VT == MVT::isVoid)
-      throw "Intrinsic '" + DefName + " has void in result type list!";
+      PrintFatalError("Intrinsic '" + DefName + " has void in result type list!");
 
     IS.RetVTs.push_back(VT);
     IS.RetTypeDefs.push_back(TyEl);
@@ -515,7 +516,7 @@
 
     // Reject invalid types.
     if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/)
-      throw "Intrinsic '" + DefName + " has void in result type list!";
+      PrintFatalError("Intrinsic '" + DefName + " has void in result type list!");
 
     IS.ParamVTs.push_back(VT);
     IS.ParamTypeDefs.push_back(TyEl);

Modified: llvm/branches/R600/utils/TableGen/CodeGenTarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenTarget.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenTarget.h (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenTarget.h Tue Nov 13 09:21:47 2012
@@ -9,8 +9,8 @@
 //
 // This file defines wrappers for the Target class and related global
 // functionality.  This makes it easier to access the data and provides a single
-// place that needs to check it for validity.  All of these classes throw
-// exceptions on error conditions.
+// place that needs to check it for validity.  All of these classes abort
+// on error conditions.
 //
 //===----------------------------------------------------------------------===//
 

Modified: llvm/branches/R600/utils/TableGen/DAGISelMatcherGen.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/DAGISelMatcherGen.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/DAGISelMatcherGen.cpp (original)
+++ llvm/branches/R600/utils/TableGen/DAGISelMatcherGen.cpp Tue Nov 13 09:21:47 2012
@@ -10,6 +10,7 @@
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenRegisters.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
@@ -172,15 +173,10 @@
   // diagnostics, which we know are impossible at this point.
   TreePattern &TP = *CGP.pf_begin()->second;
 
-  try {
-    bool MadeChange = true;
-    while (MadeChange)
-      MadeChange = PatWithNoTypes->ApplyTypeConstraints(TP,
-                                                true/*Ignore reg constraints*/);
-  } catch (...) {
-    errs() << "Type constraint application shouldn't fail!";
-    abort();
-  }
+  bool MadeChange = true;
+  while (MadeChange)
+    MadeChange = PatWithNoTypes->ApplyTypeConstraints(TP,
+                                              true/*Ignore reg constraints*/);
 }
 
 
@@ -876,7 +872,7 @@
   if (OpRec->isSubClassOf("SDNodeXForm"))
     return EmitResultSDNodeXFormAsOperand(N, ResultOps);
   errs() << "Unknown result node to emit code for: " << *N << '\n';
-  throw std::string("Unknown node in result pattern!");
+  PrintFatalError("Unknown node in result pattern!");
 }
 
 void MatcherGen::EmitResultCode() {

Modified: llvm/branches/R600/utils/TableGen/DisassemblerEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/DisassemblerEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/DisassemblerEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/DisassemblerEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -117,11 +117,9 @@
     for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
       RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
 
-    // FIXME: As long as we are using exceptions, might as well drop this to the
-    // actual conflict site.
     if (Tables.hasConflicts())
-      throw TGError(Target.getTargetRecord()->getLoc(),
-                    "Primary decode conflict");
+      PrintFatalError(Target.getTargetRecord()->getLoc(),
+                      "Primary decode conflict");
 
     Tables.emit(OS);
     return;

Modified: llvm/branches/R600/utils/TableGen/EDEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/EDEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/EDEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/EDEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -19,6 +19,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <string>
@@ -777,7 +778,7 @@
       errs() << "Operand type: " << rec.getName() << '\n';
       errs() << "Operand name: " << operandInfo.Name << '\n';
       errs() << "Instruction name: " << inst.TheDef->getName() << '\n';
-      throw("Unhandled type in EDEmitter");
+      PrintFatalError("Unhandled type in EDEmitter");
     }
   }
 }

Modified: llvm/branches/R600/utils/TableGen/FastISelEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/FastISelEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/FastISelEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/FastISelEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -549,7 +549,7 @@
     };
     
     if (SimplePatterns[Operands][OpcodeName][VT][RetVT].count(PredicateCheck))
-      throw TGError(Pattern.getSrcRecord()->getLoc(),
+      PrintFatalError(Pattern.getSrcRecord()->getLoc(),
                     "Duplicate record in FastISel table!");
 
     SimplePatterns[Operands][OpcodeName][VT][RetVT][PredicateCheck] = Memo;

Modified: llvm/branches/R600/utils/TableGen/FixedLenDecoderEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/FixedLenDecoderEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/FixedLenDecoderEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/FixedLenDecoderEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "decoder-emitter"
 
 #include "CodeGenTarget.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
@@ -741,7 +742,7 @@
 
     switch (*I) {
     default:
-      throw "invalid decode table opcode";
+      PrintFatalError("invalid decode table opcode");
     case MCD::OPC_ExtractField: {
       ++I;
       unsigned Start = *I++;

Modified: llvm/branches/R600/utils/TableGen/InstrInfoEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/InstrInfoEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/InstrInfoEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/InstrInfoEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -16,8 +16,10 @@
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
+#include "TableGenBackends.h"
 #include "SequenceToOffsetTable.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <algorithm>
@@ -342,13 +344,14 @@
 
   // Emit all of the target-specific flags...
   BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
-  if (!TSF) throw "no TSFlags?";
+  if (!TSF)
+    PrintFatalError("no TSFlags?");
   uint64_t Value = 0;
   for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) {
     if (BitInit *Bit = dyn_cast<BitInit>(TSF->getBit(i)))
       Value |= uint64_t(Bit->getValue()) << i;
     else
-      throw "Invalid TSFlags bit in " + Inst.TheDef->getName();
+      PrintFatalError("Invalid TSFlags bit in " + Inst.TheDef->getName());
   }
   OS << ", 0x";
   OS.write_hex(Value);
@@ -415,6 +418,7 @@
 
 void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
   InstrInfoEmitter(RK).run(OS);
+  EmitMapTable(RK, OS);
 }
 
 } // End llvm namespace

Modified: llvm/branches/R600/utils/TableGen/IntrinsicEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/IntrinsicEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/IntrinsicEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/IntrinsicEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -15,6 +15,7 @@
 #include "CodeGenTarget.h"
 #include "SequenceToOffsetTable.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/StringMatcher.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -249,7 +250,7 @@
   if (EVT(VT).isInteger()) {
     unsigned BitWidth = EVT(VT).getSizeInBits();
     switch (BitWidth) {
-    default: throw "unhandled integer type width in intrinsic!";
+    default: PrintFatalError("unhandled integer type width in intrinsic!");
     case 1: return Sig.push_back(IIT_I1);
     case 8: return Sig.push_back(IIT_I8);
     case 16: return Sig.push_back(IIT_I16);
@@ -259,7 +260,7 @@
   }
   
   switch (VT) {
-  default: throw "unhandled MVT in intrinsic!";
+  default: PrintFatalError("unhandled MVT in intrinsic!");
   case MVT::f32: return Sig.push_back(IIT_F32);
   case MVT::f64: return Sig.push_back(IIT_F64);
   case MVT::Metadata: return Sig.push_back(IIT_METADATA);
@@ -328,7 +329,7 @@
   if (EVT(VT).isVector()) {
     EVT VVT = VT;
     switch (VVT.getVectorNumElements()) {
-    default: throw "unhandled vector type width in intrinsic!";
+    default: PrintFatalError("unhandled vector type width in intrinsic!");
     case 2: Sig.push_back(IIT_V2); break;
     case 4: Sig.push_back(IIT_V4); break;
     case 8: Sig.push_back(IIT_V8); break;
@@ -692,8 +693,8 @@
       
       if (!BIM.insert(std::make_pair(Ints[i].GCCBuiltinName,
                                      Ints[i].EnumName)).second)
-        throw "Intrinsic '" + Ints[i].TheDef->getName() +
-              "': duplicate GCC builtin name!";
+        PrintFatalError("Intrinsic '" + Ints[i].TheDef->getName() +
+              "': duplicate GCC builtin name!");
     }
   }
   

Modified: llvm/branches/R600/utils/TableGen/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/Makefile?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/Makefile (original)
+++ llvm/branches/R600/utils/TableGen/Makefile Tue Nov 13 09:21:47 2012
@@ -10,7 +10,6 @@
 LEVEL = ../..
 TOOLNAME = llvm-tblgen
 USEDLIBS = LLVMTableGen.a LLVMSupport.a
-REQUIRES_EH := 1
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1

Modified: llvm/branches/R600/utils/TableGen/PseudoLoweringEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/PseudoLoweringEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/PseudoLoweringEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/PseudoLoweringEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -90,7 +90,7 @@
       // FIXME: We probably shouldn't ever get a non-zero BaseIdx here.
       assert(BaseIdx == 0 && "Named subargument in pseudo expansion?!");
       if (DI->getDef() != Insn.Operands[BaseIdx + i].Rec)
-        throw TGError(Rec->getLoc(),
+        PrintFatalError(Rec->getLoc(),
                       "Pseudo operand type '" + DI->getDef()->getName() +
                       "' does not match expansion operand type '" +
                       Insn.Operands[BaseIdx + i].Rec->getName() + "'");
@@ -129,22 +129,22 @@
 
   DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
   if (!OpDef)
-    throw TGError(Rec->getLoc(), Rec->getName() +
+    PrintFatalError(Rec->getLoc(), Rec->getName() +
                   " has unexpected operator type!");
   Record *Operator = OpDef->getDef();
   if (!Operator->isSubClassOf("Instruction"))
-    throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
-                                 "' is not an instruction!");
+    PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
+                    "' is not an instruction!");
 
   CodeGenInstruction Insn(Operator);
 
   if (Insn.isCodeGenOnly || Insn.isPseudo)
-    throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
-                                 "' cannot be another pseudo instruction!");
+    PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
+                    "' cannot be another pseudo instruction!");
 
   if (Insn.Operands.size() != Dag->getNumArgs())
-    throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
-                                 "' operand count mismatch");
+    PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
+                    "' operand count mismatch");
 
   unsigned NumMIOperands = 0;
   for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i)
@@ -179,9 +179,9 @@
     StringMap<unsigned>::iterator SourceOp =
       SourceOperands.find(Dag->getArgName(i));
     if (SourceOp == SourceOperands.end())
-      throw TGError(Rec->getLoc(),
-                    "Pseudo output operand '" + Dag->getArgName(i) +
-                    "' has no matching source operand.");
+      PrintFatalError(Rec->getLoc(),
+                      "Pseudo output operand '" + Dag->getArgName(i) +
+                      "' has no matching source operand.");
     // Map the source operand to the destination operand index for each
     // MachineInstr operand.
     for (unsigned I = 0, E = Insn.Operands[i].MINumOperands; I != E; ++I)

Modified: llvm/branches/R600/utils/TableGen/RegisterInfoEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/RegisterInfoEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/RegisterInfoEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/RegisterInfoEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -62,6 +62,8 @@
 
   void EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
                            const std::string &ClassName);
+  void emitComposeSubRegIndices(raw_ostream &OS, CodeGenRegBank &RegBank,
+                                const std::string &ClassName);
 };
 } // End anonymous namespace
 
@@ -530,6 +532,102 @@
   OS << Val;
 }
 
+// Try to combine Idx's compose map into Vec if it is compatible.
+// Return false if it's not possible.
+static bool combine(const CodeGenSubRegIndex *Idx,
+                    SmallVectorImpl<CodeGenSubRegIndex*> &Vec) {
+  const CodeGenSubRegIndex::CompMap &Map = Idx->getComposites();
+  for (CodeGenSubRegIndex::CompMap::const_iterator
+       I = Map.begin(), E = Map.end(); I != E; ++I) {
+    CodeGenSubRegIndex *&Entry = Vec[I->first->EnumValue - 1];
+    if (Entry && Entry != I->second)
+      return false;
+  }
+
+  // All entries are compatible. Make it so.
+  for (CodeGenSubRegIndex::CompMap::const_iterator
+       I = Map.begin(), E = Map.end(); I != E; ++I)
+    Vec[I->first->EnumValue - 1] = I->second;
+  return true;
+}
+
+static const char *getMinimalTypeForRange(uint64_t Range) {
+  assert(Range < 0xFFFFFFFFULL && "Enum too large");
+  if (Range > 0xFFFF)
+    return "uint32_t";
+  if (Range > 0xFF)
+    return "uint16_t";
+  return "uint8_t";
+}
+
+void
+RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
+                                              CodeGenRegBank &RegBank,
+                                              const std::string &ClName) {
+  ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices();
+  OS << "unsigned " << ClName
+     << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n";
+
+  // Many sub-register indexes are composition-compatible, meaning that
+  //
+  //   compose(IdxA, IdxB) == compose(IdxA', IdxB)
+  //
+  // for many IdxA, IdxA' pairs. Not all sub-register indexes can be composed.
+  // The illegal entries can be use as wildcards to compress the table further.
+
+  // Map each Sub-register index to a compatible table row.
+  SmallVector<unsigned, 4> RowMap;
+  SmallVector<SmallVector<CodeGenSubRegIndex*, 4>, 4> Rows;
+
+  for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
+    unsigned Found = ~0u;
+    for (unsigned r = 0, re = Rows.size(); r != re; ++r) {
+      if (combine(SubRegIndices[i], Rows[r])) {
+        Found = r;
+        break;
+      }
+    }
+    if (Found == ~0u) {
+      Found = Rows.size();
+      Rows.resize(Found + 1);
+      Rows.back().resize(SubRegIndices.size());
+      combine(SubRegIndices[i], Rows.back());
+    }
+    RowMap.push_back(Found);
+  }
+
+  // Output the row map if there is multiple rows.
+  if (Rows.size() > 1) {
+    OS << "  static const " << getMinimalTypeForRange(Rows.size())
+       << " RowMap[" << SubRegIndices.size() << "] = {\n    ";
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
+      OS << RowMap[i] << ", ";
+    OS << "\n  };\n";
+  }
+
+  // Output the rows.
+  OS << "  static const " << getMinimalTypeForRange(SubRegIndices.size()+1)
+     << " Rows[" << Rows.size() << "][" << SubRegIndices.size() << "] = {\n";
+  for (unsigned r = 0, re = Rows.size(); r != re; ++r) {
+    OS << "    { ";
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
+      if (Rows[r][i])
+        OS << Rows[r][i]->EnumValue << ", ";
+      else
+        OS << "0, ";
+    OS << "},\n";
+  }
+  OS << "  };\n\n";
+
+  OS << "  --IdxA; assert(IdxA < " << SubRegIndices.size() << ");\n"
+     << "  --IdxB; assert(IdxB < " << SubRegIndices.size() << ");\n";
+  if (Rows.size() > 1)
+    OS << "  return Rows[RowMap[IdxA]][IdxB];\n";
+  else
+    OS << "  return Rows[0][IdxB];\n";
+  OS << "}\n\n";
+}
+
 //
 // runMCDesc - Print out MC register descriptions.
 //
@@ -802,7 +900,8 @@
      << "  virtual bool needsStackRealignment(const MachineFunction &) const\n"
      << "     { return false; }\n";
   if (!RegBank.getSubRegIndices().empty()) {
-    OS << "  virtual unsigned composeSubRegIndices(unsigned, unsigned) const;\n"
+    OS << "  virtual unsigned composeSubRegIndicesImpl"
+       << "(unsigned, unsigned) const;\n"
       << "  virtual const TargetRegisterClass *"
       "getSubClassWithSubReg(const TargetRegisterClass*, unsigned) const;\n";
   }
@@ -1054,31 +1153,8 @@
 
   std::string ClassName = Target.getName() + "GenRegisterInfo";
 
-  // Emit composeSubRegIndices
-  if (!SubRegIndices.empty()) {
-    OS << "unsigned " << ClassName
-      << "::composeSubRegIndices(unsigned IdxA, unsigned IdxB) const {\n"
-      << "  switch (IdxA) {\n"
-      << "  default:\n    return IdxB;\n";
-    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
-      bool Open = false;
-      for (unsigned j = 0; j != e; ++j) {
-        CodeGenSubRegIndex *Comp = SubRegIndices[i]->compose(SubRegIndices[j]);
-        if (Comp && Comp != SubRegIndices[j]) {
-          if (!Open) {
-            OS << "  case " << SubRegIndices[i]->getQualifiedName()
-              << ": switch(IdxB) {\n    default: return IdxB;\n";
-            Open = true;
-          }
-          OS << "    case " << SubRegIndices[j]->getQualifiedName()
-            << ": return " << Comp->getQualifiedName() << ";\n";
-        }
-      }
-      if (Open)
-        OS << "    }\n";
-    }
-    OS << "  }\n}\n\n";
-  }
+  if (!SubRegIndices.empty())
+    emitComposeSubRegIndices(OS, RegBank, ClassName);
 
   // Emit getSubClassWithSubReg.
   if (!SubRegIndices.empty()) {
@@ -1092,7 +1168,7 @@
     else if (RegisterClasses.size() < UINT16_MAX)
       OS << "  static const uint16_t Table[";
     else
-      throw "Too many register classes.";
+      PrintFatalError("Too many register classes.");
     OS << RegisterClasses.size() << "][" << SubRegIndices.size() << "] = {\n";
     for (unsigned rci = 0, rce = RegisterClasses.size(); rci != rce; ++rci) {
       const CodeGenRegisterClass &RC = *RegisterClasses[rci];

Modified: llvm/branches/R600/utils/TableGen/SetTheory.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/SetTheory.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/SetTheory.cpp (original)
+++ llvm/branches/R600/utils/TableGen/SetTheory.cpp Tue Nov 13 09:21:47 2012
@@ -27,20 +27,20 @@
 
 // (add a, b, ...) Evaluate and union all arguments.
 struct AddOp : public SetTheory::Operator {
-  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
-    ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts);
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
+    ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc);
   }
 };
 
 // (sub Add, Sub, ...) Set difference.
 struct SubOp : public SetTheory::Operator {
-  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (Expr->arg_size() < 2)
-      throw "Set difference needs at least two arguments: " +
-        Expr->getAsString();
+      PrintFatalError(Loc, "Set difference needs at least two arguments: " +
+        Expr->getAsString());
     RecSet Add, Sub;
-    ST.evaluate(*Expr->arg_begin(), Add);
-    ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub);
+    ST.evaluate(*Expr->arg_begin(), Add, Loc);
+    ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub, Loc);
     for (RecSet::iterator I = Add.begin(), E = Add.end(); I != E; ++I)
       if (!Sub.count(*I))
         Elts.insert(*I);
@@ -49,12 +49,13 @@
 
 // (and S1, S2) Set intersection.
 struct AndOp : public SetTheory::Operator {
-  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (Expr->arg_size() != 2)
-      throw "Set intersection requires two arguments: " + Expr->getAsString();
+      PrintFatalError(Loc, "Set intersection requires two arguments: " +
+        Expr->getAsString());
     RecSet S1, S2;
-    ST.evaluate(Expr->arg_begin()[0], S1);
-    ST.evaluate(Expr->arg_begin()[1], S2);
+    ST.evaluate(Expr->arg_begin()[0], S1, Loc);
+    ST.evaluate(Expr->arg_begin()[1], S2, Loc);
     for (RecSet::iterator I = S1.begin(), E = S1.end(); I != E; ++I)
       if (S2.count(*I))
         Elts.insert(*I);
@@ -65,17 +66,19 @@
 struct SetIntBinOp : public SetTheory::Operator {
   virtual void apply2(SetTheory &ST, DagInit *Expr,
                      RecSet &Set, int64_t N,
-                     RecSet &Elts) =0;
+                     RecSet &Elts, ArrayRef<SMLoc> Loc) =0;
 
-  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (Expr->arg_size() != 2)
-      throw "Operator requires (Op Set, Int) arguments: " + Expr->getAsString();
+      PrintFatalError(Loc, "Operator requires (Op Set, Int) arguments: " +
+        Expr->getAsString());
     RecSet Set;
-    ST.evaluate(Expr->arg_begin()[0], Set);
+    ST.evaluate(Expr->arg_begin()[0], Set, Loc);
     IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]);
     if (!II)
-      throw "Second argument must be an integer: " + Expr->getAsString();
-    apply2(ST, Expr, Set, II->getValue(), Elts);
+      PrintFatalError(Loc, "Second argument must be an integer: " +
+        Expr->getAsString());
+    apply2(ST, Expr, Set, II->getValue(), Elts, Loc);
   }
 };
 
@@ -83,9 +86,10 @@
 struct ShlOp : public SetIntBinOp {
   void apply2(SetTheory &ST, DagInit *Expr,
              RecSet &Set, int64_t N,
-             RecSet &Elts) {
+             RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (N < 0)
-      throw "Positive shift required: " + Expr->getAsString();
+      PrintFatalError(Loc, "Positive shift required: " +
+        Expr->getAsString());
     if (unsigned(N) < Set.size())
       Elts.insert(Set.begin() + N, Set.end());
   }
@@ -95,9 +99,10 @@
 struct TruncOp : public SetIntBinOp {
   void apply2(SetTheory &ST, DagInit *Expr,
              RecSet &Set, int64_t N,
-             RecSet &Elts) {
+             RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (N < 0)
-      throw "Positive length required: " + Expr->getAsString();
+      PrintFatalError(Loc, "Positive length required: " +
+        Expr->getAsString());
     if (unsigned(N) > Set.size())
       N = Set.size();
     Elts.insert(Set.begin(), Set.begin() + N);
@@ -112,7 +117,7 @@
 
   void apply2(SetTheory &ST, DagInit *Expr,
              RecSet &Set, int64_t N,
-             RecSet &Elts) {
+             RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (Reverse)
       N = -N;
     // N > 0 -> rotate left, N < 0 -> rotate right.
@@ -131,9 +136,10 @@
 struct DecimateOp : public SetIntBinOp {
   void apply2(SetTheory &ST, DagInit *Expr,
              RecSet &Set, int64_t N,
-             RecSet &Elts) {
+             RecSet &Elts, ArrayRef<SMLoc> Loc) {
     if (N <= 0)
-      throw "Positive stride required: " + Expr->getAsString();
+      PrintFatalError(Loc, "Positive stride required: " +
+        Expr->getAsString());
     for (unsigned I = 0; I < Set.size(); I += N)
       Elts.insert(Set[I]);
   }
@@ -141,12 +147,12 @@
 
 // (interleave S1, S2, ...) Interleave elements of the arguments.
 struct InterleaveOp : public SetTheory::Operator {
-  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
     // Evaluate the arguments individually.
     SmallVector<RecSet, 4> Args(Expr->getNumArgs());
     unsigned MaxSize = 0;
     for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) {
-      ST.evaluate(Expr->getArg(i), Args[i]);
+      ST.evaluate(Expr->getArg(i), Args[i], Loc);
       MaxSize = std::max(MaxSize, unsigned(Args[i].size()));
     }
     // Interleave arguments into Elts.
@@ -159,38 +165,39 @@
 
 // (sequence "Format", From, To) Generate a sequence of records by name.
 struct SequenceOp : public SetTheory::Operator {
-  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
     int Step = 1;
     if (Expr->arg_size() > 4)
-      throw "Bad args to (sequence \"Format\", From, To): " +
-        Expr->getAsString();
+      PrintFatalError(Loc, "Bad args to (sequence \"Format\", From, To): " +
+        Expr->getAsString());
     else if (Expr->arg_size() == 4) {
       if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) {
         Step = II->getValue();
       } else
-        throw "Stride must be an integer: " + Expr->getAsString();
+        PrintFatalError(Loc, "Stride must be an integer: " +
+          Expr->getAsString());
     }
 
     std::string Format;
     if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0]))
       Format = SI->getValue();
     else
-      throw "Format must be a string: " + Expr->getAsString();
+      PrintFatalError(Loc,  "Format must be a string: " + Expr->getAsString());
 
     int64_t From, To;
     if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]))
       From = II->getValue();
     else
-      throw "From must be an integer: " + Expr->getAsString();
+      PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString());
     if (From < 0 || From >= (1 << 30))
-      throw "From out of range";
+      PrintFatalError(Loc, "From out of range");
 
     if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2]))
       To = II->getValue();
     else
-      throw "From must be an integer: " + Expr->getAsString();
+      PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString());
     if (To < 0 || To >= (1 << 30))
-      throw "To out of range";
+      PrintFatalError(Loc, "To out of range");
 
     RecordKeeper &Records =
       cast<DefInit>(Expr->getOperator())->getDef()->getRecords();
@@ -206,7 +213,8 @@
       OS << format(Format.c_str(), unsigned(From));
       Record *Rec = Records.getDef(OS.str());
       if (!Rec)
-        throw "No def named '" + Name + "': " + Expr->getAsString();
+        PrintFatalError(Loc, "No def named '" + Name + "': " +
+          Expr->getAsString());
       // Try to reevaluate Rec in case it is a set.
       if (const RecVec *Result = ST.expand(Rec))
         Elts.insert(Result->begin(), Result->end());
@@ -225,7 +233,7 @@
   FieldExpander(StringRef fn) : FieldName(fn) {}
 
   void expand(SetTheory &ST, Record *Def, RecSet &Elts) {
-    ST.evaluate(Def->getValueInit(FieldName), Elts);
+    ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc());
   }
 };
 } // end anonymous namespace
@@ -259,7 +267,7 @@
   addExpander(ClassName, new FieldExpander(FieldName));
 }
 
-void SetTheory::evaluate(Init *Expr, RecSet &Elts) {
+void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
   // A def in a list can be a just an element, or it may expand.
   if (DefInit *Def = dyn_cast<DefInit>(Expr)) {
     if (const RecVec *Result = expand(Def->getDef()))
@@ -270,19 +278,19 @@
 
   // Lists simply expand.
   if (ListInit *LI = dyn_cast<ListInit>(Expr))
-    return evaluate(LI->begin(), LI->end(), Elts);
+    return evaluate(LI->begin(), LI->end(), Elts, Loc);
 
   // Anything else must be a DAG.
   DagInit *DagExpr = dyn_cast<DagInit>(Expr);
   if (!DagExpr)
-    throw "Invalid set element: " + Expr->getAsString();
+    PrintFatalError(Loc, "Invalid set element: " + Expr->getAsString());
   DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator());
   if (!OpInit)
-    throw "Bad set expression: " + Expr->getAsString();
+    PrintFatalError(Loc, "Bad set expression: " + Expr->getAsString());
   Operator *Op = Operators.lookup(OpInit->getDef()->getName());
   if (!Op)
-    throw "Unknown set operator: " + Expr->getAsString();
-  Op->apply(*this, DagExpr, Elts);
+    PrintFatalError(Loc, "Unknown set operator: " + Expr->getAsString());
+  Op->apply(*this, DagExpr, Elts, Loc);
 }
 
 const RecVec *SetTheory::expand(Record *Set) {
@@ -292,23 +300,19 @@
     return &I->second;
 
   // This is the first time we see Set. Find a suitable expander.
-  try {
-    const std::vector<Record*> &SC = Set->getSuperClasses();
-    for (unsigned i = 0, e = SC.size(); i != e; ++i) {
-      // Skip unnamed superclasses.
-      if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
-        continue;
-      if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
-        // This breaks recursive definitions.
-        RecVec &EltVec = Expansions[Set];
-        RecSet Elts;
-        Exp->expand(*this, Set, Elts);
-        EltVec.assign(Elts.begin(), Elts.end());
-        return &EltVec;
-      }
+  const std::vector<Record*> &SC = Set->getSuperClasses();
+  for (unsigned i = 0, e = SC.size(); i != e; ++i) {
+    // Skip unnamed superclasses.
+    if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
+      continue;
+    if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
+      // This breaks recursive definitions.
+      RecVec &EltVec = Expansions[Set];
+      RecSet Elts;
+      Exp->expand(*this, Set, Elts);
+      EltVec.assign(Elts.begin(), Elts.end());
+      return &EltVec;
     }
-  } catch (const std::string &Error) {
-    throw TGError(Set->getLoc(), Error);
   }
 
   // Set is not expandable.

Modified: llvm/branches/R600/utils/TableGen/SetTheory.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/SetTheory.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/SetTheory.h (original)
+++ llvm/branches/R600/utils/TableGen/SetTheory.h Tue Nov 13 09:21:47 2012
@@ -49,6 +49,7 @@
 
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Support/SourceMgr.h"
 #include <map>
 #include <vector>
 
@@ -72,7 +73,8 @@
 
     /// apply - Apply this operator to Expr's arguments and insert the result
     /// in Elts.
-    virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts) =0;
+    virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts,
+                       ArrayRef<SMLoc> Loc) =0;
   };
 
   /// Expander - A callback function that can transform a Record representing a
@@ -119,13 +121,13 @@
   void addOperator(StringRef Name, Operator*);
 
   /// evaluate - Evaluate Expr and append the resulting set to Elts.
-  void evaluate(Init *Expr, RecSet &Elts);
+  void evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc);
 
   /// evaluate - Evaluate a sequence of Inits and append to Elts.
   template<typename Iter>
-  void evaluate(Iter begin, Iter end, RecSet &Elts) {
+  void evaluate(Iter begin, Iter end, RecSet &Elts, ArrayRef<SMLoc> Loc) {
     while (begin != end)
-      evaluate(*begin++, Elts);
+      evaluate(*begin++, Elts, Loc);
   }
 
   /// expand - Expand a record into a set of elements if possible.  Return a

Modified: llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp Tue Nov 13 09:21:47 2012
@@ -675,7 +675,7 @@
         continue;
     }
     if (AliasDef)
-      throw TGError(AliasRW.TheDef->getLoc(), "Multiple aliases "
+      PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases "
                     "defined for processor " + ProcModel.ModelName +
                     " Ensure only one SchedAlias exists per RW.");
     AliasDef = AliasRW.TheDef;
@@ -692,7 +692,7 @@
     if (AliasDef == (*WRI)->getValueAsDef("WriteType")
         || SchedWrite.TheDef == (*WRI)->getValueAsDef("WriteType")) {
       if (ResDef) {
-        throw TGError((*WRI)->getLoc(), "Resources are defined for both "
+        PrintFatalError((*WRI)->getLoc(), "Resources are defined for both "
                       "SchedWrite and its alias on processor " +
                       ProcModel.ModelName);
       }
@@ -702,7 +702,7 @@
   // TODO: If ProcModel has a base model (previous generation processor),
   // then call FindWriteResources recursively with that model here.
   if (!ResDef) {
-    throw TGError(ProcModel.ModelDef->getLoc(),
+    PrintFatalError(ProcModel.ModelDef->getLoc(),
                   std::string("Processor does not define resources for ")
                   + SchedWrite.TheDef->getName());
   }
@@ -729,7 +729,7 @@
         continue;
     }
     if (AliasDef)
-      throw TGError(AliasRW.TheDef->getLoc(), "Multiple aliases "
+      PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases "
                     "defined for processor " + ProcModel.ModelName +
                     " Ensure only one SchedAlias exists per RW.");
     AliasDef = AliasRW.TheDef;
@@ -746,7 +746,7 @@
     if (AliasDef == (*RAI)->getValueAsDef("ReadType")
         || SchedRead.TheDef == (*RAI)->getValueAsDef("ReadType")) {
       if (ResDef) {
-        throw TGError((*RAI)->getLoc(), "Resources are defined for both "
+        PrintFatalError((*RAI)->getLoc(), "Resources are defined for both "
                       "SchedRead and its alias on processor " +
                       ProcModel.ModelName);
       }
@@ -756,7 +756,7 @@
   // TODO: If ProcModel has a base model (previous generation processor),
   // then call FindReadAdvance recursively with that model here.
   if (!ResDef && SchedRead.TheDef->getName() != "ReadDefault") {
-    throw TGError(ProcModel.ModelDef->getLoc(),
+    PrintFatalError(ProcModel.ModelDef->getLoc(),
                   std::string("Processor does not define resources for ")
                   + SchedRead.TheDef->getName());
   }
@@ -1053,7 +1053,7 @@
       continue;
 
     std::vector<MCSchedClassDesc> &SCTab =
-      SchedTables.ProcSchedClasses[1 + PI - SchedModels.procModelBegin()];
+      SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())];
 
     OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup,"
        << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n";
@@ -1098,7 +1098,7 @@
     if (PI->hasInstrSchedModel())
       EmitProcessorResources(*PI, OS);
     else if(!PI->ProcResourceDefs.empty())
-      throw TGError(PI->ModelDef->getLoc(), "SchedMachineModel defines "
+      PrintFatalError(PI->ModelDef->getLoc(), "SchedMachineModel defines "
                     "ProcResources without defining WriteRes SchedWriteRes");
 
     // Begin processor itinerary properties

Modified: llvm/branches/R600/utils/TableGen/TableGenBackends.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/TableGenBackends.h?rev=167838&r1=167837&r2=167838&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/TableGenBackends.h (original)
+++ llvm/branches/R600/utils/TableGen/TableGenBackends.h Tue Nov 13 09:21:47 2012
@@ -74,5 +74,6 @@
 void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
+void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace