[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 1 08:52:39 PST 2024
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/79502
>From 1010379cd5e871fc404e58dbce348e953ebaf75c Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
partially initialized array
Fix #79500
---
clang/lib/CodeGen/CGDecl.cpp | 101 ++++++++++++++----
.../test/CodeGenOpenCL/partial_initializer.cl | 3 +-
2 files changed, 85 insertions(+), 19 deletions(-)
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c1724..a9ae06bd77c32 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
}
+static bool isNullOrUndef(llvm::Constant *C) {
+ return C->isNullValue() || isa<llvm::ConstantAggregateZero>(C) ||
+ isa<llvm::ConstantPointerNull>(C) || isa<llvm::UndefValue>(C);
+}
+
/// Decide whether we can emit the non-zero parts of the specified initializer
/// with equal or fewer than NumStores scalar stores.
static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,90 @@ static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
/// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
/// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
- llvm::Constant *Init, Address Loc,
- bool isVolatile, CGBuilderTy &Builder,
- bool IsAutoInit) {
+static uint64_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+ llvm::Constant *Init, Address Loc,
+ bool isVolatile,
+ CGBuilderTy &Builder,
+ bool IsAutoInit) {
assert(!Init->isNullValue() && !isa<llvm::UndefValue>(Init) &&
"called emitStoresForInitAfterBZero for zero or undef value.");
+ auto const &DL = CGM.getDataLayout();
+
if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
isa<llvm::ConstantExpr>(Init)) {
auto *I = Builder.CreateStore(Init, Loc, isVolatile);
if (IsAutoInit)
I->addAnnotationMetadata("auto-init");
- return;
+ return DL.getTypeAllocSize(Init->getType());
}
if (llvm::ConstantDataSequential *CDS =
dyn_cast<llvm::ConstantDataSequential>(Init)) {
+ bool CountNonNullBytes = true;
+ uint64_t LeadingNonNullElementsCount = 0;
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
llvm::Constant *Elt = CDS->getElementAsConstant(i);
// If necessary, get a pointer to the element and emit it.
- if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
+ if (!isNullOrUndef(Elt)) {
emitStoresForInitAfterBZero(
CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile,
Builder, IsAutoInit);
+ if (CountNonNullBytes)
+ ++LeadingNonNullElementsCount;
+ } else if (CountNonNullBytes)
+ CountNonNullBytes = false;
}
- return;
+ uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+ return LeadingNonNullElementsCount * ElementByteCount;
}
assert((isa<llvm::ConstantStruct>(Init) || isa<llvm::ConstantArray>(Init)) &&
"Unknown value type!");
+ bool CountNonNullBytes = true;
+ uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+
for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
- llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i));
+ llvm::Constant *Operand = cast<llvm::Constant>(Init->getOperand(i));
+ uint64_t OperandByteCount = DL.getTypeAllocSize(Operand->getType());
- // If necessary, get a pointer to the element and emit it.
- if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
- emitStoresForInitAfterBZero(CGM, Elt,
- Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
- isVolatile, Builder, IsAutoInit);
+ uint64_t OperandOffset;
+ if (isNullOrUndef(Operand)) {
+ OperandOffset = 0;
+ } else {
+ // If necessary, get a pointer to the element and emit it.
+ OperandOffset = emitStoresForInitAfterBZero(
+ CGM, Operand, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
+ isVolatile, Builder, IsAutoInit);
+ }
+
+ if (CountNonNullBytes) {
+ if (OperandOffset != OperandByteCount) {
+ CountNonNullBytes = false;
+
+ // When not at the beginning of the constant, add the offset of the
+ // previous elements.
+ if (i) {
+ if (auto *CS = dyn_cast<llvm::ConstantStruct>(Init)) {
+ llvm::StructType *CST = CS->getType();
+ const llvm::StructLayout *SL = DL.getStructLayout(CST);
+ OperandOffset += SL->getElementOffset(i - 1);
+ } else if (auto *CA = dyn_cast<llvm::ConstantArray>(Init)) {
+ llvm::ArrayType *CAT = CA->getType();
+ uint64_t ElementByteCount =
+ DL.getTypeAllocSize(CAT->getElementType());
+ OperandOffset += ElementByteCount * (i - 1);
+ }
+ }
+
+ Offset = OperandOffset;
+ }
+ }
}
+ return Offset;
}
/// Decide whether we should use bzero plus some stores to initialize a local
@@ -1205,12 +1252,13 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
}
auto *SizeVal = llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize);
-
// If the initializer is all or mostly the same, codegen with bzero / memset
// then do a few stores afterward.
if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
- auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
- SizeVal, isVolatile);
+ auto *I = Builder.CreateMemSet(
+ Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
+ llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize), isVolatile);
+
if (IsAutoInit)
I->addAnnotationMetadata("auto-init");
@@ -1218,8 +1266,25 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
constant->isNullValue() || isa<llvm::UndefValue>(constant);
if (!valueAlreadyCorrect) {
Loc = Loc.withElementType(Ty);
- emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder,
- IsAutoInit);
+ uint64_t Offset = emitStoresForInitAfterBZero(
+ CGM, constant, Loc, isVolatile, Builder, IsAutoInit);
+
+ // Prevent optimizing to odd offsets.
+ Offset -= Offset % Loc.getAlignment().getQuantity();
+ if (Offset && isa<llvm::ConstantArray>(constant)) {
+ if (Offset == ConstantSize) {
+ I->eraseFromParent();
+ } else {
+ Loc = Loc.withElementType(CGM.Int8Ty);
+
+ llvm::Value *AdjustedLoc = llvm::GetElementPtrInst::CreateInBounds(
+ CGM.Int8Ty, Loc.getPointer(),
+ {llvm::ConstantInt::get(CGM.IntPtrTy, Offset)}, "", I);
+ I->setOperand(0, AdjustedLoc);
+ I->setOperand(
+ 2, llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize - Offset));
+ }
+ }
}
return;
}
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003..7c01c750d1afe 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -35,7 +35,8 @@ void f(void) {
// CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
// CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
- // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false)
+ // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8
+ // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false)
// CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0
// CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0
// CHECK: store float 1.000000e+00, ptr %[[v3]], align 4
More information about the cfe-commits
mailing list