[llvm-commits] [llvm] r123571 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/basictest.ll

Sat Jan 15 22:18:29 PST 2011

Author: lattner
Date: Sun Jan 16 00:18:28 2011
New Revision: 123571

URL: http://llvm.org/viewvc/llvm-project?rev=123571&view=rev
Log:
if an alloca is only ever accessed as a unit, and is accessed with load/store instructions,
then don't try to decimate it into its individual pieces.  This will just make a mess of the
IR and is pointless if none of the elements are individually accessed.  This was generating
really terrible code for std::bitset (PR8980) because it happens to be lowered by clang
as an {[8 x i8]} structure instead of {i64}.

The testcase now is optimized to:

define i64 @test2(i64 %X) {
  br label %L2

L2:                                               ; preds = %0
  ret i64 %X
}

before we generated:

define i64 @test2(i64 %X) {
  %sroa.store.elt = lshr i64 %X, 56
  %1 = trunc i64 %sroa.store.elt to i8
  %sroa.store.elt8 = lshr i64 %X, 48
  %2 = trunc i64 %sroa.store.elt8 to i8
  %sroa.store.elt9 = lshr i64 %X, 40
  %3 = trunc i64 %sroa.store.elt9 to i8
  %sroa.store.elt10 = lshr i64 %X, 32
  %4 = trunc i64 %sroa.store.elt10 to i8
  %sroa.store.elt11 = lshr i64 %X, 24
  %5 = trunc i64 %sroa.store.elt11 to i8
  %sroa.store.elt12 = lshr i64 %X, 16
  %6 = trunc i64 %sroa.store.elt12 to i8
  %sroa.store.elt13 = lshr i64 %X, 8
  %7 = trunc i64 %sroa.store.elt13 to i8
  %8 = trunc i64 %X to i8
  br label %L2

L2:                                               ; preds = %0
  %9 = zext i8 %1 to i64
  %10 = shl i64 %9, 56
  %11 = zext i8 %2 to i64
  %12 = shl i64 %11, 48
  %13 = or i64 %12, %10
  %14 = zext i8 %3 to i64
  %15 = shl i64 %14, 40
  %16 = or i64 %15, %13
  %17 = zext i8 %4 to i64
  %18 = shl i64 %17, 32
  %19 = or i64 %18, %16
  %20 = zext i8 %5 to i64
  %21 = shl i64 %20, 24
  %22 = or i64 %21, %19
  %23 = zext i8 %6 to i64
  %24 = shl i64 %23, 16
  %25 = or i64 %24, %22
  %26 = zext i8 %7 to i64
  %27 = shl i64 %26, 8
  %28 = or i64 %27, %25
  %29 = zext i8 %8 to i64
  %30 = or i64 %29, %28
  ret i64 %30
}

In this case, instcombine was able to eliminate the nonsense, but in PR8980 enough
PHIs are in play that instcombine backs off.  It's better to not generate this stuff
in the first place.


Modified:
    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
    llvm/trunk/test/Transforms/ScalarRepl/basictest.ll

Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=123571&r1=123570&r2=123571&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Sun Jan 16 00:18:28 2011
@@ -88,8 +88,19 @@
       /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
       bool isMemCpyDst : 1;
 
+      /// hasSubelementAccess - This is true if a subelement of the alloca is
+      /// ever accessed, or false if the alloca is only accessed with mem
+      /// intrinsics or load/store that only access the entire alloca at once.
+      bool hasSubelementAccess : 1;
+      
+      /// hasALoadOrStore - This is true if there are any loads or stores to it.
+      /// The alloca may just be accessed with memcpy, for example, which would
+      /// not set this.
+      bool hasALoadOrStore : 1;
+      
       AllocaInfo()
-        : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
+        : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+          hasSubelementAccess(false), hasALoadOrStore(false) {}
     };
 
     unsigned SRThreshold;
@@ -1103,6 +1114,7 @@
         const Type *LIType = LI->getType();
         isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
                         LIType, false, Info);
+        Info.hasALoadOrStore = true;
       } else
         MarkUnsafe(Info);
     } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -1111,6 +1123,7 @@
         const Type *SIType = SI->getOperand(0)->getType();
         isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
                         SIType, true, Info);
+        Info.hasALoadOrStore = true;
       } else
         MarkUnsafe(Info);
     } else {
@@ -1217,13 +1230,17 @@
     // This is also safe for references using a type that is compatible with
     // the type of the alloca, so that loads/stores can be rewritten using
     // insertvalue/extractvalue.
-    if (isCompatibleAggregate(MemOpType, AI->getAllocatedType()))
+    if (isCompatibleAggregate(MemOpType, AI->getAllocatedType())) {
+      Info.hasSubelementAccess = true;
       return;
+    }
   }
   // Check if the offset/size correspond to a component within the alloca type.
   const Type *T = AI->getAllocatedType();
-  if (TypeHasComponent(T, Offset, MemSize))
+  if (TypeHasComponent(T, Offset, MemSize)) {
+    Info.hasSubelementAccess = true;
     return;
+  }
 
   return MarkUnsafe(Info);
 }
@@ -1851,6 +1868,19 @@
       HasPadding(AI->getAllocatedType(), *TD))
     return false;
 
+  // If the alloca is never has an access to just *part* of it, but is accessed
+  // with loads and stores, then we should use ConvertToScalarInfo to promote
+  // the alloca instead of promoting each piece at a time and inserting fission
+  // and fusion code.
+  if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+    // If the struct/array just has one element, use basic SRoA.
+    if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+      if (ST->getNumElements() > 1) return false;
+    } else {
+      if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+        return false;
+    }
+  }
   return true;
 }
 

Modified: llvm/trunk/test/Transforms/ScalarRepl/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/basictest.ll?rev=123571&r1=123570&r2=123571&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/basictest.ll (original)
+++ llvm/trunk/test/Transforms/ScalarRepl/basictest.ll Sun Jan 16 00:18:28 2011
@@ -1,11 +1,30 @@
-; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
-define i32 @test() {
+define i32 @test1() {
 	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
 	%Y = getelementptr { i32, float }* %X, i64 0, i32 0		; <i32*> [#uses=2]
 	store i32 0, i32* %Y
 	%Z = load i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+}
+
+; PR8980
+define i64 @test2(i64 %X) {
+	%A = alloca [8 x i8]
+        %B = bitcast [8 x i8]* %A to i64*
+        
+	store i64 %X, i64* %B
+        br label %L2
+        
+L2:
+	%Z = load i64* %B		; <i32> [#uses=1]
+	ret i64 %Z
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
 }