[llvm-commits] [llvm] r61853 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/copy-aggregate.ll

Wed Jan 7 00:11:13 PST 2009

Author: lattner
Date: Wed Jan  7 02:11:13 2009
New Revision: 61853

URL: http://llvm.org/viewvc/llvm-project?rev=61853&view=rev
Log:
Implement the first half of PR3290: if there is a store of an 
integer to a (transitive) bitcast the alloca and if that integer
has the full size of the alloca, then it clobbers the whole thing.
Handle this by extracting pieces out of the stored integer and 
filing them away in the SROA'd elements.

This triggers fairly frequently because the CFE uses integers to
pass small structs by value and the inliner exposes these.  For 
example, in kimwitu++, I see a bunch of these with i64 stores to
"%struct.std::pair<std::_Rb_tree_const_iterator<kc::impl_abstract_phylum*>,bool>"

In 176.gcc I see a few i32 stores to "%struct..0anon".

In the testcase, this is a difference between compiling test1 to:

_test1:
	subl	$12, %esp
	movl	20(%esp), %eax
	movl	%eax, 4(%esp)
	movl	16(%esp), %eax
	movl	%eax, (%esp)
	movl	(%esp), %eax
	addl	4(%esp), %eax
	addl	$12, %esp
	ret

vs:

_test1:
	movl	8(%esp), %eax
	addl	4(%esp), %eax
	ret

The second half of this will be to handle loads of the same form.



Added:
    llvm/trunk/test/Transforms/ScalarRepl/copy-aggregate.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=61853&r1=61852&r2=61853&view=diff

==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Wed Jan  7 02:11:13 2009
@@ -120,7 +120,8 @@
     void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
                                       AllocationInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
-
+    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
+                                       SmallVector<AllocaInst*, 32> &NewElts);
     
     const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial);
     void ConvertToScalar(AllocationInst *AI, const Type *Ty);
@@ -586,6 +587,18 @@
       isSafeUseOfBitCastedAllocation(BCU, AI, Info);
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
       isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+      // If storing the entire alloca in one chunk through a bitcasted pointer
+      // to integer, we can transform it.  This happens (for example) when you
+      // cast a {i32,i32}* to i64* and store through it.  This is similar to the
+      // memcpy case and occurs in various "byval" cases and emulated memcpys.
+      if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
+          TD->getABITypeSize(SI->getOperand(0)->getType()) == 
+          TD->getABITypeSize(AI->getType()->getElementType())) {
+        Info.isMemCpyDst = true;
+        continue;
+      }
+      return MarkUnsafe(Info);
     } else {
       return MarkUnsafe(Info);
     }
@@ -603,7 +616,7 @@
     Instruction *User = cast<Instruction>(*UI++);
     if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
       RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
-      BCU->eraseFromParent();
+      if (BCU->use_empty()) BCU->eraseFromParent();
       continue;
     }
 
@@ -611,12 +624,17 @@
       // This must be memcpy/memmove/memset of the entire aggregate.
       // Split into one per element.
       RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
-      MI->eraseFromParent();
       continue;
     }
       
-    // If it's not a mem intrinsic, it must be some other user of a gep of the
-    // first pointer.  Just leave these alone.
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // This must be a store of the entire alloca from an integer.
+      RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+      continue;
+    }
+    
+    // Otherwise it must be some other user of a gep of the first pointer.  Just
+    // leave these alone.
     continue;
   }      
 }
@@ -772,8 +790,118 @@
       CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
     }
   }
+  MI->eraseFromParent();
 }
+
+/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// overwrites the entire allocation.  Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
+                                         AllocationInst *AI,
+                                         SmallVector<AllocaInst*, 32> &NewElts){
+  // Extract each element out of the integer according to its structure offset
+  // and store the element value to the individual alloca.
+  Value *SrcVal = SI->getOperand(0);
+  const Type *AllocaEltTy = AI->getType()->getElementType();
+  uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy);
+  
+  // If this isn't a store of an integer to the whole alloca, it may be a store
+  // to the first element.  Just ignore the store in this case and normal SROA
+  // will handle it.
+  if (!isa<IntegerType>(SrcVal->getType()) ||
+      TD->getABITypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+    return;
+
+  DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
+
+  // There are two forms here: AI could be an array or struct.  Both cases
+  // have different ways to compute the element offset.
+  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+    const StructLayout *Layout = TD->getStructLayout(EltSTy);
+    
+    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+      // Get the number of bits to shift SrcVal to get the value.
+      const Type *FieldTy = EltSTy->getElementType(i);
+      uint64_t Shift = Layout->getElementOffsetInBits(i);
+      
+      if (TD->isBigEndian())
+        Shift = AllocaSizeBits-Shift-TD->getABITypeSizeInBits(FieldTy);
+      
+      Value *EltVal = SrcVal;
+      if (Shift) {
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+                                            "sroa.store.elt", SI);
+      }
+      
+      // Truncate down to an integer of the right size.
+      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+      if (FieldSizeBits != AllocaSizeBits)
+        EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI);
+      Value *DestField = NewElts[i];
+      if (EltVal->getType() == FieldTy) {
+        // Storing to an integer field of this size, just do it.
+      } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+        // Bitcast to the right element type (for fp/vector values).
+        EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+      } else {
+        // Otherwise, bitcast the dest pointer (for aggregates).
+        DestField = new BitCastInst(DestField,
+                                    PointerType::getUnqual(EltVal->getType()),
+                                    "", SI);
+      }
+      new StoreInst(EltVal, DestField, SI);
+    }
+    
+  } else {
+    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+    const Type *ArrayEltTy = ATy->getElementType();
+    uint64_t ElementOffset = TD->getABITypeSizeInBits(ArrayEltTy);
+    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+    uint64_t Shift;
+    
+    if (TD->isBigEndian())
+      Shift = AllocaSizeBits-ElementOffset;
+    else 
+      Shift = 0;
+    
+    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+      
+      Value *EltVal = SrcVal;
+      if (Shift) {
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+                                            "sroa.store.elt", SI);
+      }
+      
+      // Truncate down to an integer of the right size.
+      if (ElementSizeBits != AllocaSizeBits)
+        EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI);
+      Value *DestField = NewElts[i];
+      if (EltVal->getType() == ArrayEltTy) {
+        // Storing to an integer field of this size, just do it.
+      } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+        // Bitcast to the right element type (for fp/vector values).
+        EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+      } else {
+        // Otherwise, bitcast the dest pointer (for aggregates).
+        DestField = new BitCastInst(DestField,
+                                    PointerType::getUnqual(EltVal->getType()),
+                                    "", SI);
+      }
+      new StoreInst(EltVal, DestField, SI);
+      
+      if (TD->isBigEndian())
+        Shift -= ElementOffset;
+      else 
+        Shift += ElementOffset;
+    }
+  }
   
+  SI->eraseFromParent();
+}
+
 
 /// HasPadding - Return true if the specified type has any structure or
 /// alignment padding, false otherwise.

Added: llvm/trunk/test/Transforms/ScalarRepl/copy-aggregate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/copy-aggregate.ll?rev=61853&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/copy-aggregate.ll (added)
+++ llvm/trunk/test/Transforms/ScalarRepl/copy-aggregate.ll Wed Jan  7 02:11:13 2009
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; PR3290
+
+;; Store of integer to whole alloca struct.
+define i32 @test1(i64 %V) nounwind {
+	%X = alloca {{i32, i32}}
+	%Y = bitcast {{i32,i32}}* %X to i64*
+	store i64 %V, i64* %Y
+
+	%A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0
+	%B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1
+	%a = load i32* %A
+	%b = load i32* %B
+	%c = add i32 %a, %b
+	ret i32 %c
+}
+
+;; Store of integer to whole struct/array alloca.
+define float @test2(i128 %V) nounwind {
+	%X = alloca {[4 x float]}
+	%Y = bitcast {[4 x float]}* %X to i128*
+	store i128 %V, i128* %Y
+
+	%A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0
+	%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
+	%a = load float* %A
+	%b = load float* %B
+	%c = add float %a, %b
+	ret float %c
+}
+