r337887 - CodeGen: use non-zero memset when possible for automatic variables

JF Bastien via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 24 21:29:03 PDT 2018


Author: jfb
Date: Tue Jul 24 21:29:03 2018
New Revision: 337887

URL: http://llvm.org/viewvc/llvm-project?rev=337887&view=rev
Log:
CodeGen: use non-zero memset when possible for automatic variables

Summary:
Right now automatic variables are either initialized with bzero followed by a few stores, or memcpy'd from a synthesized global. We end up encountering a fair amount of code where memcpy of non-zero byte patterns would be better than memcpy from a global because it touches less memory and generates a smaller binary. The optimizer could reason about this, but it's not really worth it when clang already knows.

This code could definitely be more clever but I'm not sure it's worth it. In particular we could track a histogram of bytes seen and figure out (as we do with bzero) if a memset could be followed by a handful of stores. Similarly, we could tune the heuristics for GlobalSize, but using the same as for bzero seems conservatively OK for now.

<rdar://problem/42563091>

Reviewers: dexonsmith

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D49771

Modified:
    cfe/trunk/lib/CodeGen/CGDecl.cpp
    cfe/trunk/test/CodeGen/init.c

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=337887&r1=337886&r2=337887&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Tue Jul 24 21:29:03 2018
@@ -948,6 +948,113 @@ static bool shouldUseBZeroPlusStoresToIn
          canEmitInitWithFewStoresAfterBZero(Init, StoreBudget);
 }
 
+/// A byte pattern.
+///
+/// Can be "any" pattern if the value was padding or known to be undef.
+/// Can be "none" pattern if a sequence doesn't exist.
+class BytePattern {
+  uint8_t Val;
+  enum class ValueType : uint8_t { Specific, Any, None } Type;
+  BytePattern(ValueType Type) : Type(Type) {}
+
+public:
+  BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {}
+  static BytePattern Any() { return BytePattern(ValueType::Any); }
+  static BytePattern None() { return BytePattern(ValueType::None); }
+  bool isAny() const { return Type == ValueType::Any; }
+  bool isNone() const { return Type == ValueType::None; }
+  bool isValued() const { return Type == ValueType::Specific; }
+  uint8_t getValue() const {
+    assert(isValued());
+    return Val;
+  }
+  BytePattern merge(const BytePattern Other) const {
+    if (isNone() || Other.isNone())
+      return None();
+    if (isAny())
+      return Other;
+    if (Other.isAny())
+      return *this;
+    if (getValue() == Other.getValue())
+      return *this;
+    return None();
+  }
+};
+
+/// Figures out whether the constant can be initialized with memset.
+static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) {
+  if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C))
+    return BytePattern(0x00);
+  if (isa<llvm::UndefValue>(C))
+    return BytePattern::Any();
+
+  if (isa<llvm::ConstantInt>(C)) {
+    auto *Int = cast<llvm::ConstantInt>(C);
+    if (Int->getBitWidth() % 8 != 0)
+      return BytePattern::None();
+    const llvm::APInt &Value = Int->getValue();
+    if (Value.isSplat(8))
+      return BytePattern(Value.getLoBits(8).getLimitedValue());
+    return BytePattern::None();
+  }
+
+  if (isa<llvm::ConstantFP>(C)) {
+    auto *FP = cast<llvm::ConstantFP>(C);
+    llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt();
+    if (Bits.getBitWidth() % 8 != 0)
+      return BytePattern::None();
+    if (!Bits.isSplat(8))
+      return BytePattern::None();
+    return BytePattern(Bits.getLimitedValue() & 0xFF);
+  }
+
+  if (isa<llvm::ConstantVector>(C)) {
+    llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue();
+    if (Splat)
+      return constantIsRepeatedBytePattern(Splat);
+    return BytePattern::None();
+  }
+
+  if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) {
+    BytePattern Pattern(BytePattern::Any());
+    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
+      llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I));
+      Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
+      if (Pattern.isNone())
+        return Pattern;
+    }
+    return Pattern;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+          dyn_cast<llvm::ConstantDataSequential>(C)) {
+    BytePattern Pattern(BytePattern::Any());
+    for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+      llvm::Constant *Elt = CDS->getElementAsConstant(I);
+      Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
+      if (Pattern.isNone())
+        return Pattern;
+    }
+    return Pattern;
+  }
+
+  // BlockAddress, ConstantExpr, and everything else is scary.
+  return BytePattern::None();
+}
+
+/// Decide whether we should use memset to initialize a local variable instead
+/// of using a memcpy from a constant global. Assumes we've already decided to
+/// not user bzero.
+/// FIXME We could be more clever, as we are for bzero above, and generate
+///       memset followed by stores. It's unclear that's worth the effort.
+static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init,
+                                               uint64_t GlobalSize) {
+  uint64_t SizeLimit = 32;
+  if (GlobalSize <= SizeLimit)
+    return BytePattern::None();
+  return constantIsRepeatedBytePattern(Init);
+}
+
 /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
 /// variable declaration with auto, register, or no storage class specifier.
 /// These turn into simple stack objects, or GlobalValues depending on target.
@@ -1401,11 +1508,11 @@ void CodeGenFunction::EmitAutoVarInit(co
   if (Loc.getType() != BP)
     Loc = Builder.CreateBitCast(Loc, BP);
 
-  // If the initializer is all or mostly zeros, codegen with bzero then do a
-  // few stores afterward.
-  if (shouldUseBZeroPlusStoresToInitialize(
-          constant,
-          CGM.getDataLayout().getTypeAllocSize(constant->getType()))) {
+  // If the initializer is all or mostly the same, codegen with bzero / memset
+  // then do a few stores afterward.
+  uint64_t ConstantSize =
+      CGM.getDataLayout().getTypeAllocSize(constant->getType());
+  if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
     Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
                          isVolatile);
     // Zero and undef don't require a stores.
@@ -1414,28 +1521,36 @@ void CodeGenFunction::EmitAutoVarInit(co
         constant->getType()->getPointerTo(Loc.getAddressSpace()));
       emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
     }
-  } else {
-    // Otherwise, create a temporary global with the initializer then
-    // memcpy from the global to the alloca.
-    std::string Name = getStaticDeclName(CGM, D);
-    unsigned AS = CGM.getContext().getTargetAddressSpace(
-        CGM.getStringLiteralAddressSpace());
-    BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
-
-    llvm::GlobalVariable *GV =
-      new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true,
-                               llvm::GlobalValue::PrivateLinkage,
-                               constant, Name, nullptr,
-                               llvm::GlobalValue::NotThreadLocal, AS);
-    GV->setAlignment(Loc.getAlignment().getQuantity());
-    GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
-    Address SrcPtr = Address(GV, Loc.getAlignment());
-    if (SrcPtr.getType() != BP)
-      SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+    return;
+  }
 
-    Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
+  BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
+  if (!Pattern.isNone()) {
+    uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue();
+    Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
+                         isVolatile);
+    return;
   }
+
+  // Otherwise, create a temporary global with the initializer then
+  // memcpy from the global to the alloca.
+  std::string Name = getStaticDeclName(CGM, D);
+  unsigned AS = CGM.getContext().getTargetAddressSpace(
+      CGM.getStringLiteralAddressSpace());
+  BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
+
+  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+      CGM.getModule(), constant->getType(), true,
+      llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr,
+      llvm::GlobalValue::NotThreadLocal, AS);
+  GV->setAlignment(Loc.getAlignment().getQuantity());
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+  Address SrcPtr = Address(GV, Loc.getAlignment());
+  if (SrcPtr.getType() != BP)
+    SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+
+  Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
 }
 
 /// Emit an expression as an initializer for an object (variable, field, etc.)

Modified: cfe/trunk/test/CodeGen/init.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/init.c?rev=337887&r1=337886&r2=337887&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/init.c (original)
+++ cfe/trunk/test/CodeGen/init.c Tue Jul 24 21:29:03 2018
@@ -140,6 +140,72 @@ void test10(int X) {
   // CHECK: call void @bar
 }
 
+void nonzeroMemseti8() {
+  char arr[33] = { 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, };
+  // CHECK-LABEL: @nonzeroMemseti8(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 42, i32 33, i1 false)
+}
+
+void nonzeroMemseti16() {
+  unsigned short arr[17] = { 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, 0x4242, };
+  // CHECK-LABEL: @nonzeroMemseti16(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 66, i32 34, i1 false)
+}
+
+void nonzeroMemseti32() {
+  unsigned arr[9] = { 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, };
+  // CHECK-LABEL: @nonzeroMemseti32(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -16, i32 36, i1 false)
+}
+
+void nonzeroMemseti64() {
+  unsigned long long arr[7] = { 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA,  0xAAAAAAAAAAAAAAAA,  0xAAAAAAAAAAAAAAAA,  };
+  // CHECK-LABEL: @nonzeroMemseti64(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -86, i32 56, i1 false)
+}
+
+void nonzeroMemsetf32() {
+  float arr[9] = { 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, 0x1.cacacap+75, };
+  // CHECK-LABEL: @nonzeroMemsetf32(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 101, i32 36, i1 false)
+}
+
+void nonzeroMemsetf64() {
+  double arr[7] = { 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, 0x1.4444444444444p+69, };
+  // CHECK-LABEL: @nonzeroMemsetf64(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 68, i32 56, i1 false)
+}
+
+void nonzeroPaddedUnionMemset() {
+  union U { char c; int i; };
+  union U arr[9] = { 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, };
+  // CHECK-LABEL: @nonzeroPaddedUnionMemset(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -16, i32 36, i1 false)
+}
+
+void nonzeroNestedMemset() {
+  union U { char c; int i; };
+  struct S { union U u; short i; };
+  struct S arr[5] = { { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, };
+  // CHECK-LABEL: @nonzeroNestedMemset(
+  // CHECK-NOT: store
+  // CHECK-NOT: memcpy
+  // CHECK: call void @llvm.memset.p0i8.i32(i8* {{.*}}, i8 -16, i32 40, i1 false)
+}
 
 // PR9257
 struct test11S {




More information about the cfe-commits mailing list