[cfe-commits] r120692 - in /cfe/trunk: lib/CodeGen/CGDecl.cpp lib/CodeGen/CGExprAgg.cpp lib/CodeGen/CGValue.h lib/CodeGen/CodeGenFunction.h test/CodeGen/init.c

Chris Lattner sabre at nondot.org
Wed Dec 1 23:07:26 PST 2010


Author: lattner
Date: Thu Dec  2 01:07:26 2010
New Revision: 120692

URL: http://llvm.org/viewvc/llvm-project?rev=120692&view=rev
Log:
Improve codegen for initializer lists to use memset more aggressively
when an initializer is variable (I handled the constant case in a previous
patch).  This has three pieces:

1. Enhance AggValueSlot to have a 'isZeroed' bit to tell CGExprAgg that
   the memory being stored into has previously been memset to zero.
2. Teach CGExprAgg to not emit stores of zero to isZeroed memory.
3. Teach CodeGenFunction::EmitAggExpr to scan initializers to determine
   whether they are profitable to emit a memset + inividual stores vs
   stores for everything.

The heuristic used is that a global has to be more than 16 bytes and
has to be 3/4 zero to be candidate for this xform.  The two testcases
are illustrative of the scenarios this catches.  We now codegen test9 into:

 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 400, i32 4, i1 false)
 %.array = getelementptr inbounds [100 x i32]* %Arr, i32 0, i32 0
 %tmp = load i32* %X.addr, align 4
 store i32 %tmp, i32* %.array

and test10 into:

  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 392, i32 8, i1 false)
  %tmp = getelementptr inbounds %struct.b* %S, i32 0, i32 0
  %tmp1 = getelementptr inbounds %struct.a* %tmp, i32 0, i32 0
  %tmp2 = load i32* %X.addr, align 4
  store i32 %tmp2, i32* %tmp1, align 4
  %tmp5 = getelementptr inbounds %struct.b* %S, i32 0, i32 3
  %tmp10 = getelementptr inbounds %struct.a* %tmp5, i32 0, i32 4
  %tmp11 = load i32* %X.addr, align 4
  store i32 %tmp11, i32* %tmp10, align 4

Previously we produced 99 stores of zero for test9 and also tons for test10.
This xforms should substantially speed up -O0 builds when it kicks in as well
as reducing code size and optimizer heartburn on insane cases.  This resolves
PR279.


Modified:
    cfe/trunk/lib/CodeGen/CGDecl.cpp
    cfe/trunk/lib/CodeGen/CGExprAgg.cpp
    cfe/trunk/lib/CodeGen/CGValue.h
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/test/CodeGen/init.c

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=120692&r1=120691&r2=120692&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Thu Dec  2 01:07:26 2010
@@ -696,8 +696,7 @@
 
     // Get the element type.
     const llvm::Type *LElemTy = ConvertTypeForMem(Ty);
-    const llvm::Type *LElemPtrTy =
-      llvm::PointerType::get(LElemTy, Ty.getAddressSpace());
+    const llvm::Type *LElemPtrTy = LElemTy->getPointerTo(Ty.getAddressSpace());
 
     llvm::Value *VLASize = EmitVLASize(Ty);
 
@@ -860,7 +859,7 @@
     } else if (Init->getType()->isAnyComplexType()) {
       EmitComplexExprIntoAddr(Init, Loc, isVolatile);
     } else {
-      EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true));
+      EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true, false));
     }
   }
 

Modified: cfe/trunk/lib/CodeGen/CGExprAgg.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprAgg.cpp?rev=120692&r1=120691&r2=120692&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprAgg.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprAgg.cpp Thu Dec  2 01:07:26 2010
@@ -497,11 +497,42 @@
   EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddr(), T), T);
 }
 
+/// isSimpleZero - If emitting this value will obviously just cause a store of
+/// zero to memory, return true.  This can return false if uncertain, so it just
+/// handles simple cases.
+static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) {
+  // (0)
+  if (const ParenExpr *PE = dyn_cast<ParenExpr>(E))
+    return isSimpleZero(PE->getSubExpr(), CGF);
+  // 0
+  if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
+    return IL->getValue() == 0;
+  // +0.0
+  if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(E))
+    return FL->getValue().isPosZero();
+  // int()
+  if ((isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) &&
+      CGF.getTypes().isZeroInitializable(E->getType()))
+    return true;
+  // (int*)0 - Null pointer expressions.
+  if (const CastExpr *ICE = dyn_cast<CastExpr>(E))
+    return ICE->getCastKind() == CK_NullToPointer;
+  // '\0'
+  if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E))
+    return CL->getValue() == 0;
+  
+  // Otherwise, hard case: conservatively return false.
+  return false;
+}
+
+
 void 
 AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) {
   // FIXME: Ignore result?
   // FIXME: Are initializers affected by volatile?
-  if (isa<ImplicitValueInitExpr>(E)) {
+  if (Dest.isZeroed() && isSimpleZero(E, CGF)) {
+    // Storing "i32 0" to a zero'd memory location is a noop.
+  } else if (isa<ImplicitValueInitExpr>(E)) {
     EmitNullInitializationToLValue(LV, T);
   } else if (T->isReferenceType()) {
     RValue RV = CGF.EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0);
@@ -509,13 +540,19 @@
   } else if (T->isAnyComplexType()) {
     CGF.EmitComplexExprIntoAddr(E, LV.getAddress(), false);
   } else if (CGF.hasAggregateLLVMType(T)) {
-    CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true));
+    CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true,
+                                             false, Dest.isZeroed()));
   } else {
     CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(E), LV, T);
   }
 }
 
 void AggExprEmitter::EmitNullInitializationToLValue(LValue LV, QualType T) {
+  // If the destination slot is already zeroed out before the aggregate is
+  // copied into it, we don't have to emit any zeros here.
+  if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(T))
+    return;
+  
   if (!CGF.hasAggregateLLVMType(T)) {
     // For non-aggregates, we can store zero
     llvm::Value *Null = llvm::Constant::getNullValue(CGF.ConvertType(T));
@@ -573,13 +610,27 @@
     // FIXME: were we intentionally ignoring address spaces and GC attributes?
 
     for (uint64_t i = 0; i != NumArrayElements; ++i) {
+      // If we're done emitting initializers and the destination is known-zeroed
+      // then we're done.
+      if (i == NumInitElements &&
+          Dest.isZeroed() &&
+          CGF.getTypes().isZeroInitializable(ElementType))
+        break;
+
       llvm::Value *NextVal = Builder.CreateStructGEP(DestPtr, i, ".array");
       LValue LV = CGF.MakeAddrLValue(NextVal, ElementType);
+      
       if (i < NumInitElements)
         EmitInitializationToLValue(E->getInit(i), LV, ElementType);
-
       else
         EmitNullInitializationToLValue(LV, ElementType);
+      
+      // If the GEP didn't get used because of a dead zero init or something
+      // else, clean it up for -O0 builds and general tidiness.
+      if (llvm::GetElementPtrInst *GEP =
+            dyn_cast<llvm::GetElementPtrInst>(NextVal))
+        if (GEP->use_empty())
+          GEP->eraseFromParent();
     }
     return;
   }
@@ -612,13 +663,13 @@
 
     // FIXME: volatility
     FieldDecl *Field = E->getInitializedFieldInUnion();
-    LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0);
 
+    LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0);
     if (NumInitElements) {
       // Store the initializer into the field
       EmitInitializationToLValue(E->getInit(0), FieldLoc, Field->getType());
     } else {
-      // Default-initialize to null
+      // Default-initialize to null.
       EmitNullInitializationToLValue(FieldLoc, Field->getType());
     }
 
@@ -638,10 +689,16 @@
     if (Field->isUnnamedBitfield())
       continue;
 
+    // Don't emit GEP before a noop store of zero.
+    if (CurInitVal == NumInitElements && Dest.isZeroed() &&
+        CGF.getTypes().isZeroInitializable(E->getType()))
+      break;
+    
     // FIXME: volatility
     LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, *Field, 0);
     // We never generate write-barries for initialized fields.
     FieldLoc.setNonGC(true);
+    
     if (CurInitVal < NumInitElements) {
       // Store the initializer into the field.
       EmitInitializationToLValue(E->getInit(CurInitVal++), FieldLoc,
@@ -650,6 +707,14 @@
       // We're out of initalizers; default-initialize to null
       EmitNullInitializationToLValue(FieldLoc, Field->getType());
     }
+    
+    // If the GEP didn't get used because of a dead zero init or something
+    // else, clean it up for -O0 builds and general tidiness.
+    if (FieldLoc.isSimple())
+      if (llvm::GetElementPtrInst *GEP =
+            dyn_cast<llvm::GetElementPtrInst>(FieldLoc.getAddress()))
+        if (GEP->use_empty())
+          GEP->eraseFromParent();
   }
 }
 
@@ -657,6 +722,69 @@
 //                        Entry Points into this File
 //===----------------------------------------------------------------------===//
 
+/// GetNumNonZeroBytesInInit - Get an approximate count of the number of
+/// non-zero bytes that will be stored when outputting the initializer for the
+/// specified initializer expression.
+static uint64_t GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
+  if (const ParenExpr *PE = dyn_cast<ParenExpr>(E))
+    return GetNumNonZeroBytesInInit(PE->getSubExpr(), CGF);
+
+  // 0 and 0.0 won't require any non-zero stores!
+  if (isSimpleZero(E, CGF)) return 0;
+
+  // If this is an initlist expr, sum up the size of sizes of the (present)
+  // elements.  If this is something weird, assume the whole thing is non-zero.
+  const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
+  if (ILE == 0 || !CGF.getTypes().isZeroInitializable(ILE->getType()))
+    return CGF.getContext().getTypeSize(E->getType())/8;
+  
+  uint64_t NumNonZeroBytes = 0;
+  for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
+    NumNonZeroBytes += GetNumNonZeroBytesInInit(ILE->getInit(i), CGF);
+  return NumNonZeroBytes;
+}
+
+/// CheckAggExprForMemSetUse - If the initializer is large and has a lot of
+/// zeros in it, emit a memset and avoid storing the individual zeros.
+///
+static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
+                                     CodeGenFunction &CGF) {
+  // If the slot is already known to be zeroed, nothing to do.  Don't mess with
+  // volatile stores.
+  if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == 0) return;
+  
+  // If the type is 16-bytes or smaller, prefer individual stores over memset.
+  std::pair<uint64_t, unsigned> TypeInfo =
+    CGF.getContext().getTypeInfo(E->getType());
+  if (TypeInfo.first/8 <= 16)
+    return;
+
+  // Check to see if over 3/4 of the initializer are known to be zero.  If so,
+  // we prefer to emit memset + individual stores for the rest.
+  uint64_t NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF);
+  if (NumNonZeroBytes*4 > TypeInfo.first/8)
+    return;
+  
+  // Okay, it seems like a good idea to use an initial memset, emit the call.
+  llvm::Constant *SizeVal = CGF.Builder.getInt64(TypeInfo.first/8);
+  llvm::ConstantInt *AlignVal = CGF.Builder.getInt32(TypeInfo.second/8);
+
+  llvm::Value *Loc = Slot.getAddr();
+  const llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext());
+  
+  Loc = CGF.Builder.CreateBitCast(Loc, BP);
+  CGF.Builder.CreateCall5(CGF.CGM.getMemSetFn(Loc->getType(),
+                                              SizeVal->getType()),
+                          Loc, CGF.Builder.getInt8(0), SizeVal, AlignVal,
+                          CGF.Builder.getFalse());
+  
+  // Tell the AggExprEmitter that the slot is known zero.
+  Slot.setZeroed();
+}
+
+
+
+
 /// EmitAggExpr - Emit the computation of the specified expression of aggregate
 /// type.  The result is computed into DestPtr.  Note that if DestPtr is null,
 /// the value of the aggregate expression is not needed.  If VolatileDest is
@@ -670,20 +798,20 @@
                                   bool IgnoreResult) {
   assert(E && hasAggregateLLVMType(E->getType()) &&
          "Invalid aggregate expression to emit");
-  assert((Slot.getAddr() != 0 || Slot.isIgnored())
-         && "slot has bits but no address");
+  assert((Slot.getAddr() != 0 || Slot.isIgnored()) &&
+         "slot has bits but no address");
 
-  AggExprEmitter(*this, Slot, IgnoreResult)
-    .Visit(const_cast<Expr*>(E));
+  // Optimize the slot if possible.
+  CheckAggExprForMemSetUse(Slot, E, *this);
+ 
+  AggExprEmitter(*this, Slot, IgnoreResult).Visit(const_cast<Expr*>(E));
 }
 
 LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
   assert(hasAggregateLLVMType(E->getType()) && "Invalid argument!");
   llvm::Value *Temp = CreateMemTemp(E->getType());
   LValue LV = MakeAddrLValue(Temp, E->getType());
-  AggValueSlot Slot
-    = AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false);
-  EmitAggExpr(E, Slot);
+  EmitAggExpr(E, AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false));
   return LV;
 }
 

Modified: cfe/trunk/lib/CodeGen/CGValue.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGValue.h?rev=120692&r1=120691&r2=120692&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGValue.h (original)
+++ cfe/trunk/lib/CodeGen/CGValue.h Thu Dec  2 01:07:26 2010
@@ -336,6 +336,11 @@
   bool VolatileFlag : 1;
   bool LifetimeFlag : 1;
   bool RequiresGCollection : 1;
+  
+  /// IsZeroed - This is set to true if the destination is known to be zero
+  /// before the assignment into it.  This means that zero fields don't need to
+  /// be set.
+  bool IsZeroed : 1;
 
 public:
   /// ignored - Returns an aggregate value slot indicating that the
@@ -343,7 +348,7 @@
   static AggValueSlot ignored() {
     AggValueSlot AV;
     AV.Addr = 0;
-    AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = 0;
+    AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = AV.IsZeroed =0;
     return AV;
   }
 
@@ -357,17 +362,19 @@
   ///   somewhere that ObjC GC calls should be emitted for
   static AggValueSlot forAddr(llvm::Value *Addr, bool Volatile,
                               bool LifetimeExternallyManaged,
-                              bool RequiresGCollection=false) {
+                              bool RequiresGCollection = false,
+                              bool IsZeroed = false) {
     AggValueSlot AV;
     AV.Addr = Addr;
     AV.VolatileFlag = Volatile;
     AV.LifetimeFlag = LifetimeExternallyManaged;
     AV.RequiresGCollection = RequiresGCollection;
+    AV.IsZeroed = IsZeroed;
     return AV;
   }
 
   static AggValueSlot forLValue(LValue LV, bool LifetimeExternallyManaged,
-                                bool RequiresGCollection=false) {
+                                bool RequiresGCollection = false) {
     return forAddr(LV.getAddress(), LV.isVolatileQualified(),
                    LifetimeExternallyManaged, RequiresGCollection);
   }
@@ -399,6 +406,10 @@
     return RValue::getAggregate(getAddr(), isVolatile());
   }
   
+  void setZeroed(bool V = true) { IsZeroed = V; }
+  bool isZeroed() const {
+    return IsZeroed;
+  }
 };
 
 }  // end namespace CodeGen

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=120692&r1=120691&r2=120692&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Thu Dec  2 01:07:26 2010
@@ -1662,8 +1662,7 @@
                                   const BlockDeclRefExpr *BDRE);
 
   RValue EmitCXXExprWithTemporaries(const CXXExprWithTemporaries *E,
-                                    AggValueSlot Slot
-                                      = AggValueSlot::ignored());
+                                    AggValueSlot Slot =AggValueSlot::ignored());
 
   void EmitCXXThrowExpr(const CXXThrowExpr *E);
 

Modified: cfe/trunk/test/CodeGen/init.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/init.c?rev=120692&r1=120691&r2=120692&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/init.c (original)
+++ cfe/trunk/test/CodeGen/init.c Thu Dec  2 01:07:26 2010
@@ -70,3 +70,33 @@
 // CHECK: store i8 98
 // CHECK: store i8 99
 }
+
+void bar(void*);
+
+// PR279
+int test9(int X) {
+  int Arr[100] = { X };     // Should use memset
+  bar(Arr);
+// CHECK: @test9
+// CHECK: call void @llvm.memset
+// CHECK-NOT: store i32 0
+// CHECK: call void @bar
+}
+
+struct a {
+  int a, b, c, d, e, f, g, h, i, j, k, *p;
+};
+
+struct b {
+  struct a a,b,c,d,e,f,g;
+};
+
+int test10(int X) {
+  struct b S = { .a.a = X, .d.e = X, .f.e = 0, .f.f = 0, .f.p = 0 };
+  bar(&S);
+
+  // CHECK: @test10
+  // CHECK: call void @llvm.memset
+  // CHECK-NOT: store i32 0
+  // CHECK: call void @bar
+}





More information about the cfe-commits mailing list