[llvm-commits] [llvm] r60608 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Subtarget.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp test/CodeGen/X86/loop-strength-reduce-2.ll test/CodeGen/X86/loop-strength-reduce-3.ll test/CodeGen/X86/loop-strength-reduce.ll

Dale Johannesen dalej at apple.com
Fri Dec 5 13:47:28 PST 2008


Author: johannes
Date: Fri Dec  5 15:47:27 2008
New Revision: 60608

URL: http://llvm.org/viewvc/llvm-project?rev=60608&view=rev
Log:
Make LoopStrengthReduce smarter about hoisting things out of
loops when they can be subsumed into addressing modes.

Change X86 addressing mode check to realize that
some PIC references need an extra register.
(I believe this is correct for Linux, if not, I'm sure
someone will tell me.)


Added:
    llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll
    llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=60608&r1=60607&r2=60608&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec  5 15:47:27 2008
@@ -6463,6 +6463,10 @@
     // We can only fold this if we don't need an extra load.
     if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
       return false;
+    // If BaseGV requires a register, we cannot also have a BaseReg.
+    if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
+        AM.HasBaseReg)
+      return false;
 
     // X86-64 only supports addr of globals in small code model.
     if (Subtarget->is64Bit()) {

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=60608&r1=60607&r2=60608&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Dec  5 15:47:27 2008
@@ -59,7 +59,23 @@
       return (GV->hasDLLImportLinkage());
     }
   }
-  
+  return false;
+}
+
+/// True if accessing the GV requires a register.  This is a superset of the
+/// cases where GVRequiresExtraLoad is true.  Some variations of PIC require
+/// a register, but not an extra load.
+bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV,
+                                       const TargetMachine& TM,
+                                       bool isDirectCall) const
+{
+  if (GVRequiresExtraLoad(GV, TM, isDirectCall))
+    return true;
+  // Code below here need only consider cases where GVRequiresExtraLoad
+  // returns false.
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return !isDirectCall && 
+      (GV->hasInternalLinkage() || GV->hasExternalLinkage());
   return false;
 }
 

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=60608&r1=60607&r2=60608&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Fri Dec  5 15:47:27 2008
@@ -444,7 +444,33 @@
   return true;
 }
 
-  
+/// isAddress - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+  bool isAddress = isa<LoadInst>(Inst);
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->getOperand(1) == OperandVal)
+      isAddress = true;
+  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // Addressing modes can also be folded into prefetches and a variety
+    // of intrinsics.
+    switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::prefetch:
+      case Intrinsic::x86_sse2_loadu_dq:
+      case Intrinsic::x86_sse2_loadu_pd:
+      case Intrinsic::x86_sse_loadu_ps:
+      case Intrinsic::x86_sse_storeu_ps:
+      case Intrinsic::x86_sse2_storeu_pd:
+      case Intrinsic::x86_sse2_storeu_dq:
+      case Intrinsic::x86_sse2_storel_dq:
+        if (II->getOperand(1) == OperandVal)
+          isAddress = true;
+        break;
+    }
+  }
+  return isAddress;
+}
 
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
@@ -731,15 +757,16 @@
 }
 
 
-/// isTargetConstant - Return true if the following can be referenced by the
-/// immediate field of a target instruction.
-static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy,
-                             const TargetLowering *TLI) {
+/// fitsInAddressMode - Return true if V can be subsumed within an addressing
+/// mode, and does not need to be put in a register first.
+static bool fitsInAddressMode(const SCEVHandle &V, const Type *UseTy,
+                             const TargetLowering *TLI, bool HasBaseReg) {
   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
     int64_t VC = SC->getValue()->getSExtValue();
     if (TLI) {
       TargetLowering::AddrMode AM;
       AM.BaseOffs = VC;
+      AM.HasBaseReg = HasBaseReg;
       return TLI->isLegalAddressingMode(AM, UseTy);
     } else {
       // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
@@ -754,6 +781,7 @@
         if (GlobalValue *GV = dyn_cast<GlobalValue>(Op0)) {
           TargetLowering::AddrMode AM;
           AM.BaseGV = GV;
+          AM.HasBaseReg = HasBaseReg;
           return TLI->isLegalAddressingMode(AM, UseTy);
         }
       }
@@ -846,7 +874,7 @@
     return;
   } else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
     // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
-    if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) &&
+    if (isAddress && fitsInAddressMode(SME->getOperand(0), UseTy, TLI, false) &&
         SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
 
       SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType());
@@ -859,7 +887,7 @@
         // Scale SubImm up by "8".  If the result is a target constant, we are
         // good.
         SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
-        if (isTargetConstant(SubImm, UseTy, TLI)) {
+        if (fitsInAddressMode(SubImm, UseTy, TLI, false)) {
           // Accumulate the immediate.
           Imm = SE->getAddExpr(Imm, SubImm);
           
@@ -873,7 +901,7 @@
 
   // Loop-variant expressions must stay in the immediate field of the
   // expression.
-  if ((isAddress && isTargetConstant(Val, UseTy, TLI)) ||
+  if ((isAddress && fitsInAddressMode(Val, UseTy, TLI, false)) ||
       !Val->isLoopInvariant(L)) {
     Imm = SE->getAddExpr(Imm, Val);
     Val = SE->getIntegerSCEV(0, Val->getType());
@@ -912,21 +940,28 @@
   }
 }
 
-
-/// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases,
-/// removing any common subexpressions from it.  Anything truly common is
-/// removed, accumulated, and returned.  This looks for things like (a+b+c) and
+// This is logically local to the following function, but C++ says we have 
+// to make it file scope.
+struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
+
+/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
+/// the Uses, removing any common subexpressions, except that if all such
+/// subexpressions can be folded into an addressing mode for all uses inside
+/// the loop (this case is referred to as "free" in comments herein) we do
+/// not remove anything.  This looks for things like (a+b+c) and
 /// (a+c+d) and computes the common (a+c) subexpression.  The common expression
 /// is *removed* from the Bases and returned.
 static SCEVHandle 
 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
-                                    ScalarEvolution *SE, Loop *L) {
+                                    ScalarEvolution *SE, Loop *L,
+                                    const TargetLowering *TLI) {
   unsigned NumUses = Uses.size();
 
   // Only one use?  This is a very common case, so we handle it specially and
   // cheaply.
   SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
   SCEVHandle Result = Zero;
+  SCEVHandle FreeResult = Zero;
   if (NumUses == 1) {
     // If the use is inside the loop, use its base, regardless of what it is:
     // it is clearly shared across all the IV's.  If the use is outside the loop
@@ -939,7 +974,10 @@
 
   // To find common subexpressions, count how many of Uses use each expression.
   // If any subexpressions are used Uses.size() times, they are common.
-  std::map<SCEVHandle, unsigned> SubExpressionUseCounts;
+  // Also track whether all uses of each expression can be moved into an
+  // an addressing mode "for free"; such expressions are left within the loop.
+  // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
+  std::map<SCEVHandle, SubExprUseData> SubExpressionUseData;
   
   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
   // order we see them.
@@ -962,31 +1000,89 @@
     // CSEs we can find.
     if (Uses[i].Base == Zero) return Zero;
 
+    // If this use is as an address we may be able to put CSEs in the addressing
+    // mode rather than hoisting them.
+    bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
+    // We may need the UseTy below, but only when isAddrUse, so compute it
+    // only in that case.
+    const Type *UseTy = 0;
+    if (isAddrUse) {
+      UseTy  = Uses[i].Inst->getType();
+      if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
+        UseTy = SI->getOperand(0)->getType();
+    }
+
     // Split the expression into subexprs.
     SeparateSubExprs(SubExprs, Uses[i].Base, SE);
-    // Add one to SubExpressionUseCounts for each subexpr present.
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
-      if (++SubExpressionUseCounts[SubExprs[j]] == 1)
+    // Add one to SubExpressionUseData.Count for each subexpr present, and
+    // if the subexpr is not a valid immediate within an addressing mode use,
+    // set SubExpressionUseData.notAllUsesAreFree.  We definitely want to
+    // hoist these out of the loop (if they are common to all uses).
+    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
+      if (++SubExpressionUseData[SubExprs[j]].Count == 1)
         UniqueSubExprs.push_back(SubExprs[j]);
+      if (!isAddrUse || !fitsInAddressMode(SubExprs[j], UseTy, TLI, false))
+        SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
+    }
     SubExprs.clear();
   }
 
   // Now that we know how many times each is used, build Result.  Iterate over
   // UniqueSubexprs so that we have a stable ordering.
   for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
-    std::map<SCEVHandle, unsigned>::iterator I = 
-       SubExpressionUseCounts.find(UniqueSubExprs[i]);
-    assert(I != SubExpressionUseCounts.end() && "Entry not found?");
-    if (I->second == NumUsesInsideLoop)   // Found CSE!
-      Result = SE->getAddExpr(Result, I->first);
-    else
-      // Remove non-cse's from SubExpressionUseCounts.
-      SubExpressionUseCounts.erase(I);
+    std::map<SCEVHandle, SubExprUseData>::iterator I = 
+       SubExpressionUseData.find(UniqueSubExprs[i]);
+    assert(I != SubExpressionUseData.end() && "Entry not found?");
+    if (I->second.Count == NumUsesInsideLoop) { // Found CSE! 
+      if (I->second.notAllUsesAreFree)
+        Result = SE->getAddExpr(Result, I->first);
+      else 
+        FreeResult = SE->getAddExpr(FreeResult, I->first);
+    } else
+      // Remove non-cse's from SubExpressionUseData.
+      SubExpressionUseData.erase(I);
+  }
+
+  if (FreeResult != Zero) {
+    // We have some subexpressions that can be subsumed into addressing
+    // modes in every use inside the loop.  However, it's possible that
+    // there are so many of them that the combined FreeResult cannot
+    // be subsumed, or that the target cannot handle both a FreeResult
+    // and a Result in the same instruction (for example because it would
+    // require too many registers).  Check this.
+    for (unsigned i=0; i<NumUses; ++i) {
+      if (!L->contains(Uses[i].Inst->getParent()))
+        continue;
+      // We know this is an addressing mode use; if there are any uses that
+      // are not, FreeResult would be Zero.
+      const Type *UseTy = Uses[i].Inst->getType();
+      if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
+        UseTy = SI->getOperand(0)->getType();
+      if (!fitsInAddressMode(FreeResult, UseTy, TLI, Result!=Zero)) {
+        // FIXME:  could split up FreeResult into pieces here, some hoisted
+        // and some not.  Doesn't seem worth it for now.
+        Result = SE->getAddExpr(Result, FreeResult);
+        FreeResult = Zero;
+        break;
+      }
+    }
   }
-  
+
   // If we found no CSE's, return now.
   if (Result == Zero) return Result;
   
+  // If we still have a FreeResult, remove its subexpressions from
+  // SubExpressionUseData.  This means they will remain in the use Bases.
+  if (FreeResult != Zero) {
+    SeparateSubExprs(SubExprs, FreeResult, SE);
+    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
+      std::map<SCEVHandle, SubExprUseData>::iterator I = 
+         SubExpressionUseData.find(SubExprs[j]);
+      SubExpressionUseData.erase(I);
+    }
+    SubExprs.clear();
+  }
+
   // Otherwise, remove all of the CSE's we found from each of the base values.
   for (unsigned i = 0; i != NumUses; ++i) {
     // Uses outside the loop don't necessarily include the common base, but
@@ -1003,7 +1099,7 @@
 
     // Remove any common subexpressions.
     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
-      if (SubExpressionUseCounts.count(SubExprs[j])) {
+      if (SubExpressionUseData.count(SubExprs[j])) {
         SubExprs.erase(SubExprs.begin()+j);
         --j; --e;
       }
@@ -1131,34 +1227,6 @@
   return SC->getValue()->getValue().isNegative();
 }
 
-/// isAddress - Returns true if the specified instruction is using the
-/// specified value as an address.
-static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
-  bool isAddress = isa<LoadInst>(Inst);
-  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-    if (SI->getOperand(1) == OperandVal)
-      isAddress = true;
-  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-    // Addressing modes can also be folded into prefetches and a variety
-    // of intrinsics.
-    switch (II->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::prefetch:
-      case Intrinsic::x86_sse2_loadu_dq:
-      case Intrinsic::x86_sse2_loadu_pd:
-      case Intrinsic::x86_sse_loadu_ps:
-      case Intrinsic::x86_sse_storeu_ps:
-      case Intrinsic::x86_sse2_storeu_pd:
-      case Intrinsic::x86_sse2_storeu_dq:
-      case Intrinsic::x86_sse2_storel_dq:
-        if (II->getOperand(1) == OperandVal)
-          isAddress = true;
-        break;
-    }
-  }
-  return isAddress;
-}
-
 // CollectIVUsers - Transform our list of users and offsets to a bit more
 // complex table. In this new vector, each 'BasedUser' contains 'Base', the base
 // of the strided accesses, as well as the old information from Uses. We
@@ -1190,7 +1258,7 @@
   // "A+B"), emit it to the preheader, then remove the expression from the
   // UsersToProcess base values.
   SCEVHandle CommonExprs =
-    RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L);
+    RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
 
   // Next, figure out what we can represent in the immediate fields of
   // instructions.  If we can represent anything there, move it to the imm
@@ -1347,7 +1415,8 @@
     Constant *C = dyn_cast<Constant>(CommonBaseV);
     if (!C ||
         (!C->isNullValue() &&
-         !isTargetConstant(SE->getUnknown(CommonBaseV), ReplacedTy, TLI)))
+         !fitsInAddressMode(SE->getUnknown(CommonBaseV), ReplacedTy, 
+                           TLI, false)))
       // We want the common base emitted into the preheader! This is just
       // using cast as a copy so BitCast (no-op cast) is appropriate
       CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), 
@@ -1403,7 +1472,8 @@
     // this by forcing a BitCast (noop cast) to be inserted into the preheader 
     // in this case.
     if (Constant *C = dyn_cast<Constant>(BaseV)) {
-      if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) {
+      if (!C->isNullValue() && !fitsInAddressMode(Base, ReplacedTy, 
+                                                 TLI, false)) {
         // We want this constant emitted into the preheader! This is just
         // using cast as a copy so BitCast (no-op cast) is appropriate
         BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",

Added: llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll?rev=60608&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll (added)
+++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll Fri Dec  5 15:47:27 2008
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic | \
+; RUN:   grep {A-} | count 1
+;
+; Make sure the common loop invariant A is hoisted up to preheader,
+; since too many registers are needed to subsume it into the addressing modes.
+
+ at A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

Added: llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll?rev=60608&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll (added)
+++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll Fri Dec  5 15:47:27 2008
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN:   grep {A+} | count 2
+;
+; Make sure the common loop invariant A is not hoisted up to preheader,
+; since it can be subsumed it into the addressing modes.
+
+ at A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll?rev=60608&r1=60607&r2=60608&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll (original)
+++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll Fri Dec  5 15:47:27 2008
@@ -1,7 +1,8 @@
-; RUN: llvm-as < %s | llc -march=x86 | \
-; RUN:   grep {A(} | count 1
+; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | \
+; RUN:   grep {A+} | count 2
 ;
-; Make sure the common loop invariant _A(reg) is hoisted up to preheader.
+; Make sure the common loop invariant A is not hoisted up to preheader,
+; since it can be subsumed into the addressing mode in all uses.
 
 @A = internal global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
 





More information about the llvm-commits mailing list