[llvm-commits] [llvm] r65220 - in /llvm/branches/Apple/Dib: include/llvm/Transforms/Utils/AddrModeMatcher.h lib/Transforms/Scalar/CodeGenPrepare.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp lib/Transforms/Utils/AddrModeMatcher.cpp test/CodeGen/X86/2007-10-05-3AddrConvert.ll test/CodeGen/X86/loop-strength-reduce-2.ll test/CodeGen/X86/loop-strength-reduce7.ll test/CodeGen/X86/loop-strength-reduce8.ll test/CodeGen/X86/stride-nine-with-base-reg.ll

Fri Feb 20 19:35:52 PST 2009

Author: void
Date: Fri Feb 20 21:35:52 2009
New Revision: 65220

URL: http://llvm.org/viewvc/llvm-project?rev=65220&view=rev
Log:
Pulling r65134 into Dib
A    include/llvm/Transforms/Utils/AddrModeMatcher.h
A    lib/Transforms/Utils/AddrModeMatcher.cpp
C    lib/Transforms/Scalar/CodeGenPrepare.cpp
Pulling r65178 into Dib
A    test/CodeGen/X86/loop-strength-reduce7.ll
U    lib/Transforms/Scalar/LoopStrengthReduce.cpp
Pulling r65215 into Dib
U    test/CodeGen/X86/loop-strength-reduce-2.ll
A    test/CodeGen/X86/loop-strength-reduce8.ll
U    test/CodeGen/X86/2007-10-05-3AddrConvert.ll
U    test/CodeGen/X86/stride-nine-with-base-reg.ll
G    lib/Transforms/Scalar/LoopStrengthReduce.cpp


Added:
    llvm/branches/Apple/Dib/include/llvm/Transforms/Utils/AddrModeMatcher.h
      - copied unchanged from r65134, llvm/trunk/include/llvm/Transforms/Utils/AddrModeMatcher.h
    llvm/branches/Apple/Dib/lib/Transforms/Utils/AddrModeMatcher.cpp
      - copied unchanged from r65134, llvm/trunk/lib/Transforms/Utils/AddrModeMatcher.cpp
    llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce7.ll
      - copied unchanged from r65178, llvm/trunk/test/CodeGen/X86/loop-strength-reduce7.ll
    llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce8.ll
      - copied unchanged from r65215, llvm/trunk/test/CodeGen/X86/loop-strength-reduce8.ll
Modified:
    llvm/branches/Apple/Dib/lib/Transforms/Scalar/CodeGenPrepare.cpp
    llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/branches/Apple/Dib/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
    llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce-2.ll
    llvm/branches/Apple/Dib/test/CodeGen/X86/stride-nine-with-base-reg.ll

Modified: llvm/branches/Apple/Dib/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=65220&r1=65219&r2=65220&view=diff

==============================================================================

--- llvm/branches/Apple/Dib/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Transforms/Scalar/CodeGenPrepare.cpp Fri Feb 20 21:35:52 2009
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
@@ -551,640 +552,6 @@
 // Addressing Mode Analysis and Optimization
 //===----------------------------------------------------------------------===//
 
-namespace {
-  /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
-  /// which holds actual Value*'s for register values.
-  struct ExtAddrMode : public TargetLowering::AddrMode {
-    Value *BaseReg;
-    Value *ScaledReg;
-    ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
-    void print(OStream &OS) const;
-    void dump() const {
-      print(cerr);
-      cerr << '\n';
-    }
-  };
-} // end anonymous namespace
-
-static inline OStream &operator<<(OStream &OS, const ExtAddrMode &AM) {
-  AM.print(OS);
-  return OS;
-}
-
-void ExtAddrMode::print(OStream &OS) const {
-  bool NeedPlus = false;
-  OS << "[";
-  if (BaseGV)
-    OS << (NeedPlus ? " + " : "")
-       << "GV:%" << BaseGV->getName(), NeedPlus = true;
-
-  if (BaseOffs)
-    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
-
-  if (BaseReg)
-    OS << (NeedPlus ? " + " : "")
-       << "Base:%" << BaseReg->getName(), NeedPlus = true;
-  if (Scale)
-    OS << (NeedPlus ? " + " : "")
-       << Scale << "*%" << ScaledReg->getName(), NeedPlus = true;
-
-  OS << ']';
-}
-
-namespace {
-/// AddressingModeMatcher - This class exposes a single public method, which is
-/// used to construct a "maximal munch" of the addressing mode for the target
-/// specified by TLI for an access to "V" with an access type of AccessTy.  This
-/// returns the addressing mode that is actually matched by value, but also
-/// returns the list of instructions involved in that addressing computation in
-/// AddrModeInsts.
-class AddressingModeMatcher {
-  SmallVectorImpl<Instruction*> &AddrModeInsts;
-  const TargetLowering &TLI;
-
-  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
-  /// the memory instruction that we're computing this address for.
-  const Type *AccessTy;
-  Instruction *MemoryInst;
-  
-  /// AddrMode - This is the addressing mode that we're building up.  This is
-  /// part of the return value of this addressing mode matching stuff.
-  ExtAddrMode &AddrMode;
-  
-  /// IgnoreProfitability - This is set to true when we should not do
-  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
-  /// always returns true.
-  bool IgnoreProfitability;
-  
-  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
-                        const TargetLowering &T, const Type *AT,
-                        Instruction *MI, ExtAddrMode &AM)
-    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
-    IgnoreProfitability = false;
-  }
-public:
-  
-  /// Match - Find the maximal addressing mode that a load/store of V can fold,
-  /// give an access type of AccessTy.  This returns a list of involved
-  /// instructions in AddrModeInsts.
-  static ExtAddrMode Match(Value *V, const Type *AccessTy,
-                           Instruction *MemoryInst,
-                           SmallVectorImpl<Instruction*> &AddrModeInsts,
-                           const TargetLowering &TLI) {
-    ExtAddrMode Result;
-
-    bool Success = 
-      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
-                            MemoryInst, Result).MatchAddr(V, 0);
-    Success = Success; assert(Success && "Couldn't select *anything*?");
-    return Result;
-  }
-private:
-  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
-  bool MatchAddr(Value *V, unsigned Depth);
-  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
-  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
-                                            ExtAddrMode &AMBefore,
-                                            ExtAddrMode &AMAfter);
-  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-};
-} // end anonymous namespace
-
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
-/// Return true and update AddrMode if this addr mode is legal for the target,
-/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
-                                             unsigned Depth) {
-  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
-  // mode.  Just process that directly.
-  if (Scale == 1)
-    return MatchAddr(ScaleReg, Depth);
-  
-  // If the scale is 0, it takes nothing to add this.
-  if (Scale == 0)
-    return true;
-  
-  // If we already have a scale of this value, we can add to it, otherwise, we
-  // need an available scale field.
-  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
-    return false;
-
-  ExtAddrMode TestAddrMode = AddrMode;
-
-  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
-  // [A+B + A*7] -> [B+A*8].
-  TestAddrMode.Scale += Scale;
-  TestAddrMode.ScaledReg = ScaleReg;
-
-  // If the new address isn't legal, bail out.
-  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
-    return false;
-
-  // It was legal, so commit it.
-  AddrMode = TestAddrMode;
-  
-  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
-  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
-  // X*Scale + C*Scale to addr mode.
-  ConstantInt *CI; Value *AddLHS;
-  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
-      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
-    TestAddrMode.ScaledReg = AddLHS;
-    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-      
-    // If this addressing mode is legal, commit it and remember that we folded
-    // this instruction.
-    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
-      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
-      AddrMode = TestAddrMode;
-      return true;
-    }
-  }
-
-  // Otherwise, not (x+c)*scale, just return what we have.
-  return true;
-}
-
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it.  This doesn't need to be perfect, but needs to accept at least
-/// the set of instructions that MatchOperationAddr can.
-static bool MightBeFoldableInst(Instruction *I) {
-  switch (I->getOpcode()) {
-  case Instruction::BitCast:
-    // Don't touch identity bitcasts.
-    if (I->getType() == I->getOperand(0)->getType())
-      return false;
-    return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType());
-  case Instruction::PtrToInt:
-    // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return true;
-  case Instruction::IntToPtr:
-    // We know the input is intptr_t, so this is foldable.
-    return true;
-  case Instruction::Add:
-    return true;
-  case Instruction::Mul:
-  case Instruction::Shl:
-    // Can only handle X*C and X << C.
-    return isa<ConstantInt>(I->getOperand(1));
-  case Instruction::GetElementPtr:
-    return true;
-  default:
-    return false;
-  }
-}
-
-
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode.  If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
-                                               unsigned Depth) {
-  // Avoid exponential behavior on extremely deep expression trees.
-  if (Depth >= 5) return false;
-  
-  switch (Opcode) {
-  case Instruction::PtrToInt:
-    // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return MatchAddr(AddrInst->getOperand(0), Depth);
-  case Instruction::IntToPtr:
-    // This inttoptr is a no-op if the integer type is pointer sized.
-    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
-        TLI.getPointerTy())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
-    return false;
-  case Instruction::BitCast:
-    // BitCast is always a noop, and we can handle it as long as it is
-    // int->int or pointer->pointer (we don't want int<->fp or something).
-    if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) ||
-         isa<IntegerType>(AddrInst->getOperand(0)->getType())) &&
-        // Don't touch identity bitcasts.  These were probably put here by LSR,
-        // and we don't want to mess around with them.  Assume it knows what it
-        // is doing.
-        AddrInst->getOperand(0)->getType() != AddrInst->getType())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
-    return false;
-  case Instruction::Add: {
-    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(0), Depth+1))
-      return true;
-    
-    // Restore the old addr mode info.
-    AddrMode = BackupAddrMode;
-    AddrModeInsts.resize(OldSize);
-    
-    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
-    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(1), Depth+1))
-      return true;
-    
-    // Otherwise we definitely can't merge the ADD in.
-    AddrMode = BackupAddrMode;
-    AddrModeInsts.resize(OldSize);
-    break;
-  }
-  //case Instruction::Or:
-  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
-  //break;
-  case Instruction::Mul:
-  case Instruction::Shl: {
-    // Can only handle X*C and X << C.
-    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
-    if (!RHS) return false;
-    int64_t Scale = RHS->getSExtValue();
-    if (Opcode == Instruction::Shl)
-      Scale = 1 << Scale;
-    
-    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
-  }
-  case Instruction::GetElementPtr: {
-    // Scan the GEP.  We check it if it contains constant offsets and at most
-    // one variable offset.
-    int VariableOperand = -1;
-    unsigned VariableScale = 0;
-    
-    int64_t ConstantOffset = 0;
-    const TargetData *TD = TLI.getTargetData();
-    gep_type_iterator GTI = gep_type_begin(AddrInst);
-    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
-      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-        const StructLayout *SL = TD->getStructLayout(STy);
-        unsigned Idx =
-          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
-        ConstantOffset += SL->getElementOffset(Idx);
-      } else {
-        uint64_t TypeSize = TD->getTypePaddedSize(GTI.getIndexedType());
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
-          ConstantOffset += CI->getSExtValue()*TypeSize;
-        } else if (TypeSize) {  // Scales of zero don't do anything.
-          // We only allow one variable index at the moment.
-          if (VariableOperand != -1)
-            return false;
-          
-          // Remember the variable index.
-          VariableOperand = i;
-          VariableScale = TypeSize;
-        }
-      }
-    }
-    
-    // A common case is for the GEP to only do a constant offset.  In this case,
-    // just add it to the disp field and check validity.
-    if (VariableOperand == -1) {
-      AddrMode.BaseOffs += ConstantOffset;
-      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
-        // Check to see if we can fold the base pointer in too.
-        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
-          return true;
-      }
-      AddrMode.BaseOffs -= ConstantOffset;
-      return false;
-    }
-
-    // Save the valid addressing mode in case we can't match.
-    ExtAddrMode BackupAddrMode = AddrMode;
-    
-    // Check that this has no base reg yet.  If so, we won't have a place to
-    // put the base of the GEP (assuming it is not a null ptr).
-    bool SetBaseReg = true;
-    if (isa<ConstantPointerNull>(AddrInst->getOperand(0)))
-      SetBaseReg = false;   // null pointer base doesn't need representation.
-    else if (AddrMode.HasBaseReg)
-      return false;  // Base register already specified, can't match GEP.
-    else {
-      // Otherwise, we'll use the GEP base as the BaseReg.
-      AddrMode.HasBaseReg = true;
-      AddrMode.BaseReg = AddrInst->getOperand(0);
-    }
-    
-    // See if the scale and offset amount is valid for this target.
-    AddrMode.BaseOffs += ConstantOffset;
-    
-    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
-                          Depth)) {
-      AddrMode = BackupAddrMode;
-      return false;
-    }
-    
-    // If we have a null as the base of the GEP, folding in the constant offset
-    // plus variable scale is all we can do.
-    if (!SetBaseReg) return true;
-      
-    // If this match succeeded, we know that we can form an address with the
-    // GepBase as the basereg.  Match the base pointer of the GEP more
-    // aggressively by zeroing out BaseReg and rematching.  If the base is
-    // (for example) another GEP, this allows merging in that other GEP into
-    // the addressing mode we're forming.
-    AddrMode.HasBaseReg = false;
-    AddrMode.BaseReg = 0;
-    bool Success = MatchAddr(AddrInst->getOperand(0), Depth+1);
-    assert(Success && "MatchAddr should be able to fill in BaseReg!");
-    Success=Success;
-    return true;
-  }
-  }
-  return false;
-}
-
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode.  If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
-///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
-    // Fold in immediates if legal for the target.
-    AddrMode.BaseOffs += CI->getSExtValue();
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.BaseOffs -= CI->getSExtValue();
-  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
-    // If this is a global variable, try to fold it into the addressing mode.
-    if (AddrMode.BaseGV == 0) {
-      AddrMode.BaseGV = GV;
-      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-        return true;
-      AddrMode.BaseGV = 0;
-    }
-  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-
-    // Check to see if it is possible to fold this operation.
-    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
-      // Okay, it's possible to fold this.  Check to see if it is actually
-      // *profitable* to do so.  We use a simple cost model to avoid increasing
-      // register pressure too much.
-      if (I->hasOneUse() ||
-          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
-        AddrModeInsts.push_back(I);
-        return true;
-      }
-      
-      // It isn't profitable to do this, roll back.
-      //cerr << "NOT FOLDING: " << *I;
-      AddrMode = BackupAddrMode;
-      AddrModeInsts.resize(OldSize);
-    }
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
-    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
-      return true;
-  } else if (isa<ConstantPointerNull>(Addr)) {
-    // Null pointer gets folded without affecting the addressing mode.
-    return true;
-  }
-
-  // Worse case, the target should support [reg] addressing modes. :)
-  if (!AddrMode.HasBaseReg) {
-    AddrMode.HasBaseReg = true;
-    AddrMode.BaseReg = Addr;
-    // Still check for legality in case the target supports [imm] but not [i+r].
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.HasBaseReg = false;
-    AddrMode.BaseReg = 0;
-  }
-
-  // If the base register is already taken, see if we can do [r+r].
-  if (AddrMode.Scale == 0) {
-    AddrMode.Scale = 1;
-    AddrMode.ScaledReg = Addr;
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.Scale = 0;
-    AddrMode.ScaledReg = 0;
-  }
-  // Couldn't match.
-  return false;
-}
-
-
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands.  If so, return true, otherwise
-/// return false.
-static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
-                                    const TargetLowering &TLI) {
-  std::vector<InlineAsm::ConstraintInfo>
-  Constraints = IA->ParseConstraints();
-  
-  unsigned ArgNo = 1;   // ArgNo - The operand of the CallInst.
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-    
-    // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-      case InlineAsm::isOutput:
-        if (OpInfo.isIndirect)
-          OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
-        break;
-      case InlineAsm::isInput:
-        OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
-        break;
-      case InlineAsm::isClobber:
-        // Nothing to do.
-        break;
-    }
-    
-    // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue(),
-                             OpInfo.ConstraintType == TargetLowering::C_Memory);
-    
-    // If this asm operand is our Value*, and if it isn't an indirect memory
-    // operand, we can't fold it!
-    if (OpInfo.CallOperandVal == OpVal &&
-        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
-         !OpInfo.isIndirect))
-      return false;
-  }
-  
-  return true;
-}
-
-
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use.  If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
-                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
-                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
-                              const TargetLowering &TLI) {
-  // If we already considered this instruction, we're done.
-  if (!ConsideredInsts.insert(I))
-    return false;
-  
-  // If this is an obviously unfoldable instruction, bail out.
-  if (!MightBeFoldableInst(I))
-    return true;
-
-  // Loop over all the uses, recursively processing them.
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-       UI != E; ++UI) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
-      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
-      if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr.
-      MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo()));
-      continue;
-    }
-    
-    if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
-      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
-      if (IA == 0) return true;
-      
-      // If this is a memory operand, we're cool, otherwise bail out.
-      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
-        return true;
-      continue;
-    }
-    
-    if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts,
-                          TLI))
-      return true;
-  }
-
-  return false;
-}
-
-
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into.  If so, there is no cost to
-/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
-                                                   Value *KnownLive2) {
-  // If Val is either of the known-live values, we know it is live!
-  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
-    return true;
-  
-  // All values other than instructions and arguments (e.g. constants) are live.
-  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-  
-  // If Val is a constant sized alloca in the entry block, it is live, this is
-  // true because it is just a reference to the stack/frame pointer, which is
-  // live for the whole function.
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
-    if (AI->isStaticAlloca())
-      return true;
-  
-  // Check to see if this value is already used in the memory instruction's
-  // block.  If so, it's already live into the block at the very least, so we
-  // can reasonably fold it.
-  BasicBlock *MemBB = MemoryInst->getParent();
-  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
-       UI != E; ++UI)
-    // We know that uses of arguments and instructions have to be instructions.
-    if (cast<Instruction>(*UI)->getParent() == MemBB)
-      return true;
-  
-  return false;
-}
-
-
-
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it.  However, the specified instruction has multiple
-/// uses.  Given this, it may actually increase register pressure to fold it
-/// into the load.  For example, consider this code:
-///
-///     X = ...
-///     Y = X+1
-///     use(Y)   -> nonload/store
-///     Z = Y+1
-///     load Z
-///
-/// In this case, Y has multiple uses, and can be folded into the load of Z
-/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
-/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
-/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
-/// number of computations either.
-///
-/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
-/// X was live across 'load Z' for other reasons, we actually *would* want to
-/// fold the addressing mode in the Z case.  This would make Y die earlier.
-bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
-                                     ExtAddrMode &AMAfter) {
-  if (IgnoreProfitability) return true;
-  
-  // AMBefore is the addressing mode before this instruction was folded into it,
-  // and AMAfter is the addressing mode after the instruction was folded.  Get
-  // the set of registers referenced by AMAfter and subtract out those
-  // referenced by AMBefore: this is the set of values which folding in this
-  // address extends the lifetime of.
-  //
-  // Note that there are only two potential values being referenced here,
-  // BaseReg and ScaleReg (global addresses are always available, as are any
-  // folded immediates).
-  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-  
-  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
-  // lifetime wasn't extended by adding this instruction.
-  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
-    BaseReg = 0;
-  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
-    ScaledReg = 0;
-
-  // If folding this instruction (and it's subexprs) didn't extend any live
-  // ranges, we're ok with it.
-  if (BaseReg == 0 && ScaledReg == 0)
-    return true;
-
-  // If all uses of this instruction are ultimately load/store/inlineasm's,
-  // check to see if their addressing modes will include this instruction.  If
-  // so, we can fold it into all uses, so it doesn't matter if it has multiple
-  // uses.
-  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
-  SmallPtrSet<Instruction*, 16> ConsideredInsts;
-  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
-    return false;  // Has a non-memory, non-foldable use!
-  
-  // Now that we know that all uses of this instruction are part of a chain of
-  // computation involving only operations that could theoretically be folded
-  // into a memory use, loop over each of these uses and see if they could
-  // *actually* fold the instruction.
-  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
-  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
-    Instruction *User = MemoryUses[i].first;
-    unsigned OpNo = MemoryUses[i].second;
-    
-    // Get the access type of this use.  If the use isn't a pointer, we don't
-    // know what it accesses.
-    Value *Address = User->getOperand(OpNo);
-    if (!isa<PointerType>(Address->getType()))
-      return false;
-    const Type *AddressAccessTy =
-      cast<PointerType>(Address->getType())->getElementType();
-    
-    // Do a match against the root of this address, ignoring profitability. This
-    // will tell us if the addressing mode for the memory operation will
-    // *actually* cover the shared instruction.
-    ExtAddrMode Result;
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
-                                  MemoryInst, Result);
-    Matcher.IgnoreProfitability = true;
-    bool Success = Matcher.MatchAddr(Address, 0);
-    Success = Success; assert(Success && "Couldn't select *anything*?");
-
-    // If the match didn't cover I, then it won't be shared by it.
-    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
-                  I) == MatchedAddrModeInsts.end())
-      return false;
-    
-    MatchedAddrModeInsts.clear();
-  }
-  
-  return true;
-}
-
-
 //===----------------------------------------------------------------------===//
 // Memory Optimization
 //===----------------------------------------------------------------------===//

Modified: llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=65220&r1=65219&r2=65220&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp Fri Feb 20 21:35:52 2009
@@ -26,18 +26,18 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
-#include <set>
 using namespace llvm;
 
 STATISTIC(NumReduced ,    "Number of GEPs strength reduced");
@@ -45,6 +45,7 @@
 STATISTIC(NumVariable,    "Number of PHIs with variable strides");
 STATISTIC(NumEliminated,  "Number of strides eliminated");
 STATISTIC(NumShadow,      "Number of Shadow IVs optimized");
+STATISTIC(NumImmSunk,     "Number of common expr immediates sunk into uses");
 
 namespace {
 
@@ -920,21 +921,17 @@
 /// that can fit into the immediate field of instructions in the target.
 /// Accumulate these immediate values into the Imm value.
 static void MoveImmediateValues(const TargetLowering *TLI,
-                                Instruction *User,
+                                const Type *UseTy,
                                 SCEVHandle &Val, SCEVHandle &Imm,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
-  const Type *UseTy = User->getType();
-  if (StoreInst *SI = dyn_cast<StoreInst>(User))
-    UseTy = SI->getOperand(0)->getType();
-
   if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
     std::vector<SCEVHandle> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
       SCEVHandle NewOp = SAE->getOperand(i);
-      MoveImmediateValues(TLI, User, NewOp, Imm, isAddress, L, SE);
+      MoveImmediateValues(TLI, UseTy, NewOp, Imm, isAddress, L, SE);
       
       if (!NewOp->isLoopInvariant(L)) {
         // If this is a loop-variant expression, it must stay in the immediate
@@ -953,7 +950,7 @@
   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
     // Try to pull immediates out of the start value of nested addrec's.
     SCEVHandle Start = SARE->getStart();
-    MoveImmediateValues(TLI, User, Start, Imm, isAddress, L, SE);
+    MoveImmediateValues(TLI, UseTy, Start, Imm, isAddress, L, SE);
     
     if (Start != SARE->getStart()) {
       std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
@@ -968,7 +965,7 @@
 
       SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType());
       SCEVHandle NewOp = SME->getOperand(1);
-      MoveImmediateValues(TLI, User, NewOp, SubImm, isAddress, L, SE);
+      MoveImmediateValues(TLI, UseTy, NewOp, SubImm, isAddress, L, SE);
       
       // If we extracted something out of the subexpressions, see if we can 
       // simplify this!
@@ -1000,6 +997,16 @@
   // Otherwise, no immediates to move.
 }
 
+static void MoveImmediateValues(const TargetLowering *TLI,
+                                Instruction *User,
+                                SCEVHandle &Val, SCEVHandle &Imm,
+                                bool isAddress, Loop *L,
+                                ScalarEvolution *SE) {
+  const Type *UseTy = User->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(User))
+    UseTy = SI->getOperand(0)->getType();
+  MoveImmediateValues(TLI, UseTy, Val, Imm, isAddress, L, SE);
+}
 
 /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
 /// added together.  This is used to reassociate common addition subexprs
@@ -1405,6 +1412,7 @@
   // fields of the BasedUsers.  We do this so that it increases the commonality
   // of the remaining uses.
   unsigned NumPHI = 0;
+  bool HasAddress = false;
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
     // If the user is not in the current loop, this means it is using the exit
     // value of the IV.  Do not put anything in the base, make sure it's all in
@@ -1415,6 +1423,8 @@
       UsersToProcess[i].Base = 
         SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
     } else {
+      // Not all uses are outside the loop.
+      AllUsesAreOutsideLoop = false; 
 
       // Addressing modes can be folded into loads and stores.  Be careful that
       // the store is through the expression, not of the expression though.
@@ -1426,8 +1436,8 @@
         ++NumPHI;
       }
 
-      // Not all uses are outside the loop.
-      AllUsesAreOutsideLoop = false; 
+      if (isAddress)
+        HasAddress = true;
      
       // If this use isn't an address, then not all uses are addresses.
       if (!isAddress && !isPHI)
@@ -1438,15 +1448,41 @@
     }
   }
 
-  // If one of the use if a PHI node and all other uses are addresses, still
+  // If one of the use is a PHI node and all other uses are addresses, still
   // allow iv reuse. Essentially we are trading one constant multiplication
   // for one fewer iv.
   if (NumPHI > 1)
     AllUsesAreAddresses = false;
+    
+  // There are no in-loop address uses.
+  if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
+    AllUsesAreAddresses = false;
 
   return CommonExprs;
 }
 
+static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
+                                    const Type *ReplacedTy,
+                                   std::vector<BasedUser> &UsersToProcess,
+                                   const TargetLowering *TLI) {
+  SmallVector<Instruction*, 16> AddrModeInsts;
+  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
+    if (UsersToProcess[i].isUseOfPostIncrementedValue)
+      continue;
+    ExtAddrMode AddrMode =
+      AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
+                                   ReplacedTy, UsersToProcess[i].Inst,
+                                   AddrModeInsts, *TLI);
+    if (GV && GV != AddrMode.BaseGV)
+      return false;
+    if (Offset && !AddrMode.BaseOffs)
+      // FIXME: How to accurate check it's immediate offset is folded.
+      return false;
+    AddrModeInsts.clear();
+  }
+  return true;
+}
+
 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
 /// stride of IV.  All of the users may have different starting values, and this
 /// may not be the only stride (we know it is if isOnlyStride is true).
@@ -1509,6 +1545,41 @@
 
   const Type *ReplacedTy = CommonExprs->getType();
   
+  // If all uses are addresses, consider sinking the immediate part of the
+  // common expression back into uses if they can fit in the immediate fields.
+  if (HaveCommonExprs && AllUsesAreAddresses) {
+    SCEVHandle NewCommon = CommonExprs;
+    SCEVHandle Imm = SE->getIntegerSCEV(0, ReplacedTy);
+    MoveImmediateValues(TLI, ReplacedTy, NewCommon, Imm, true, L, SE);
+    if (!Imm->isZero()) {
+      bool DoSink = true;
+
+      // If the immediate part of the common expression is a GV, check if it's
+      // possible to fold it into the target addressing mode.
+      GlobalValue *GV = 0;
+      if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm)) {
+        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
+          if (CE->getOpcode() == Instruction::PtrToInt)
+            GV = dyn_cast<GlobalValue>(CE->getOperand(0));
+      }
+      int64_t Offset = 0;
+      if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
+        Offset = SC->getValue()->getSExtValue();
+      if (GV || Offset)
+        DoSink = IsImmFoldedIntoAddrMode(GV, Offset, ReplacedTy,
+                                         UsersToProcess, TLI);
+
+      if (DoSink) {
+        DOUT << "  Sinking " << *Imm << " back down into uses\n";
+        for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
+          UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
+        CommonExprs = NewCommon;
+        HaveCommonExprs = !CommonExprs->isZero();
+        ++NumImmSunk;
+      }
+    }
+  }
+
   // Now that we know what we need to do, insert the PHI node itself.
   //
   DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
@@ -2320,7 +2391,8 @@
     bool HasOneStride = IVUsesByStride.size() == 1;
 
 #ifndef NDEBUG
-    DOUT << "\nLSR on ";
+    DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
+         << "\" ";
     DEBUG(L->dump());
 #endif
 

Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/2007-10-05-3AddrConvert.ll?rev=65220&r1=65219&r2=65220&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/2007-10-05-3AddrConvert.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/2007-10-05-3AddrConvert.ll Fri Feb 20 21:35:52 2009
@@ -4,29 +4,43 @@
 	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
 	%struct.node = type { i16, double, [3 x double], i32, i32 }
 
-define fastcc void @old_main() {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
 entry:
-	%tmp44 = malloc %struct.anon		; <%struct.anon*> [#uses=2]
-	store double 4.000000e+00, double* null, align 4
-	br label %bb41
-
-bb41:		; preds = %uniform_testdata.exit, %entry
-	%i.0110 = phi i32 [ 0, %entry ], [ %tmp48, %uniform_testdata.exit ]		; <i32> [#uses=2]
-	%tmp48 = add i32 %i.0110, 1		; <i32> [#uses=1]
-	br i1 false, label %uniform_testdata.exit, label %bb33.preheader.i
-
-bb33.preheader.i:		; preds = %bb41
-	ret void
-
-uniform_testdata.exit:		; preds = %bb41
-	%tmp57 = getelementptr %struct.anon* %tmp44, i32 0, i32 3, i32 %i.0110		; <%struct.bnode**> [#uses=1]
-	store %struct.bnode* null, %struct.bnode** %tmp57, align 4
-	br i1 false, label %bb154, label %bb41
-
-bb154:		; preds = %bb154, %uniform_testdata.exit
-	br i1 false, label %bb166, label %bb154
-
-bb166:		; preds = %bb154
-	%tmp169 = getelementptr %struct.anon* %tmp44, i32 0, i32 3, i32 0		; <%struct.bnode**> [#uses=0]
-	ret void
+	%0 = malloc %struct.anon		; <%struct.anon*> [#uses=2]
+	%1 = getelementptr %struct.anon* %0, i32 0, i32 2		; <%struct.node**> [#uses=1]
+	br label %bb14.i
+
+bb14.i:		; preds = %bb14.i, %entry
+	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ]		; <i32> [#uses=1]
+	%2 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond74.i = icmp eq i32 %2, 32		; <i1> [#uses=1]
+	br i1 %exitcond74.i, label %bb32.i, label %bb14.i
+
+bb32.i:		; preds = %bb32.i, %bb14.i
+	%tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ]		; <i32> [#uses=1]
+	%indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond64.i = icmp eq i32 %indvar.next63.i, 64		; <i1> [#uses=1]
+	br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i
+
+bb.i.i:		; preds = %bb47.loopexit.i
+	unreachable
+
+stepsystem.exit.i:		; preds = %bb47.loopexit.i
+	store %struct.node* null, %struct.node** %1, align 4
+	br label %bb.i6.i
+
+bb.i6.i:		; preds = %bb.i6.i, %stepsystem.exit.i
+	%tmp.0.i.i = add i32 0, -1		; <i32> [#uses=1]
+	%3 = icmp slt i32 %tmp.0.i.i, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb107.i.i, label %bb.i6.i
+
+bb107.i.i:		; preds = %bb107.i.i, %bb.i6.i
+	%q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ]		; <%struct.bnode**> [#uses=1]
+	%q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in		; <%struct.bnode*> [#uses=0]
+	br label %bb107.i.i
+
+bb47.loopexit.i:		; preds = %bb32.i
+	%4 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0		; <%struct.bnode**> [#uses=1]
+	%5 = icmp eq %struct.node* null, null		; <i1> [#uses=1]
+	br i1 %5, label %stepsystem.exit.i, label %bb.i.i
 }

Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce-2.ll?rev=65220&r1=65219&r2=65220&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce-2.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/loop-strength-reduce-2.ll Fri Feb 20 21:35:52 2009
@@ -1,8 +1,10 @@
 ; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic | \
 ; RUN:   grep {, 4} | count 1
+; RUN: llvm-as < %s | llc -march=x86 | not grep lea
 ;
 ; Make sure the common loop invariant A is hoisted up to preheader,
 ; since too many registers are needed to subsume it into the addressing modes.
+; It's safe to sink A in when it's not pic.
 
 @A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
 

Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/stride-nine-with-base-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/stride-nine-with-base-reg.ll?rev=65220&r1=65219&r2=65220&view=diff

==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/stride-nine-with-base-reg.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/stride-nine-with-base-reg.ll Fri Feb 20 21:35:52 2009
@@ -1,14 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | grep lea | count 1
+; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | not grep lea
 ; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
 
-; For x86 there's an lea above the loop. In both cases, there shouldn't
-; be any lea instructions inside the loop.
+; _P should be sunk into the loop and folded into the address mode. There
+; shouldn't be any lea instructions inside the loop.
 
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
 @P = external global [1000 x i8], align 32
 
-define void @foo(i32 %m, i32 %p) {
+define void @foo(i32 %m, i32 %p) nounwind {
 entry:
 	%tmp1 = icmp sgt i32 %m, 0
 	br i1 %tmp1, label %bb, label %return