[clang] [Clang][CodeGen] Do not set inbounds flag for struct GEP with null base pointers (PR #130734)
Yingwei Zheng via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 31 02:04:24 PDT 2025
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/130734
>From 173ba1595cef80e2affd3ed57feabb2feb0856a7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 11 Mar 2025 16:20:08 +0800
Subject: [PATCH 01/11] [Clang][CodeGen] Do not set inbounds flag for struct
GEP with null base pointers
---
clang/lib/CodeGen/CGBuilder.h | 15 ++++++++++-----
...nullptr-and-nonzero-offset-in-offsetof-idiom.c | 2 +-
...llptr-and-nonzero-offset-in-offsetof-idiom.cpp | 2 +-
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index b8036cf6e6a30..11e8818b33397 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -223,11 +223,16 @@ class CGBuilderTy : public CGBuilderBaseTy {
const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
- return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(),
- Index, Name),
- ElTy->getElementType(Index),
- Addr.getAlignment().alignmentAtOffset(Offset),
- Addr.isKnownNonNull());
+ // Specially, we don't add inbounds flags if the base pointer is null.
+ // This is a workaround for old-style offsetof macros.
+ llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
+ if (!isa<llvm::ConstantPointerNull>(Addr.getBasePointer()))
+ NWFlags |= llvm::GEPNoWrapFlags::inBounds();
+ return Address(
+ CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
+ Index, Name, NWFlags),
+ ElTy->getElementType(Index),
+ Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
}
/// Given
diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
index 68c0ee3a3a885..a7cfd77766712 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
@@ -17,7 +17,7 @@ struct S {
// CHECK-LABEL: @get_offset_of_y_naively(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
//
uintptr_t get_offset_of_y_naively(void) {
return ((uintptr_t)(&(((struct S *)0)->y)));
diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index 34d4f4c9e34eb..f00a2c486574c 100644
--- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -10,7 +10,7 @@ struct S {
// CHECK-LABEL: @_Z23get_offset_of_y_naivelyv(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
//
uintptr_t get_offset_of_y_naively() {
return ((uintptr_t)(&(((S *)nullptr)->y)));
>From fe733d081be378f713ab0169831cc63eec65a7dc Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 14 Mar 2025 14:28:55 +0800
Subject: [PATCH 02/11] [Clang][CodeGen] Address review comments.
---
clang/lib/CodeGen/CGBuilder.h | 15 +++++----------
clang/lib/CodeGen/CGExpr.cpp | 27 +++++++++++++++++++--------
clang/lib/CodeGen/CodeGenFunction.h | 3 ++-
3 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 11e8818b33397..b8036cf6e6a30 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -223,16 +223,11 @@ class CGBuilderTy : public CGBuilderBaseTy {
const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
- // Specially, we don't add inbounds flags if the base pointer is null.
- // This is a workaround for old-style offsetof macros.
- llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
- if (!isa<llvm::ConstantPointerNull>(Addr.getBasePointer()))
- NWFlags |= llvm::GEPNoWrapFlags::inBounds();
- return Address(
- CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
- Index, Name, NWFlags),
- ElTy->getElementType(Index),
- Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
+ return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(),
+ Index, Name),
+ ElTy->getElementType(Index),
+ Addr.getAlignment().alignmentAtOffset(Offset),
+ Addr.isKnownNonNull());
}
/// Given
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3d3a111f0514a..0b32d03e143c5 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4792,6 +4792,10 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
}
Expr *BaseExpr = E->getBase();
+ Expr *UnderlyingBaseExpr = BaseExpr;
+ while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
+ UnderlyingBaseExpr = BaseMemberExpr->getBase();
+ bool IsBaseConstantNull = getContext().isSentinelNullExpr(UnderlyingBaseExpr);
// If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar.
LValue BaseLV;
if (E->isArrow()) {
@@ -4813,7 +4817,7 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
NamedDecl *ND = E->getMemberDecl();
if (auto *Field = dyn_cast<FieldDecl>(ND)) {
- LValue LV = EmitLValueForField(BaseLV, Field);
+ LValue LV = EmitLValueForField(BaseLV, Field, IsBaseConstantNull);
setObjCGCLValueClass(getContext(), E, LV);
if (getLangOpts().OpenMP) {
// If the member was explicitly marked as nontemporal, mark it as
@@ -4899,12 +4903,15 @@ unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
/// Get the address of a zero-sized field within a record. The resulting
/// address doesn't necessarily have the right type.
static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
- const FieldDecl *Field) {
+ const FieldDecl *Field,
+ bool IsBaseConstantNull) {
CharUnits Offset = CGF.getContext().toCharUnitsFromBits(
CGF.getContext().getFieldOffset(Field));
if (Offset.isZero())
return Base;
Base = Base.withElementType(CGF.Int8Ty);
+ if (IsBaseConstantNull)
+ return CGF.Builder.CreateConstByteGEP(Base, Offset);
return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset);
}
@@ -4913,15 +4920,18 @@ static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
///
/// The resulting address doesn't necessarily have the right type.
static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
- const FieldDecl *field) {
+ const FieldDecl *field,
+ bool IsBaseConstantNull) {
if (isEmptyFieldForLayout(CGF.getContext(), field))
- return emitAddrOfZeroSizeField(CGF, base, field);
+ return emitAddrOfZeroSizeField(CGF, base, field, IsBaseConstantNull);
const RecordDecl *rec = field->getParent();
unsigned idx =
CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
+ if (IsBaseConstantNull)
+ return CGF.Builder.CreateConstGEP(base, idx, field->getName());
return CGF.Builder.CreateStructGEP(base, idx, field->getName());
}
@@ -4957,8 +4967,8 @@ static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
return false;
}
-LValue CodeGenFunction::EmitLValueForField(LValue base,
- const FieldDecl *field) {
+LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field,
+ bool IsBaseConstantNull) {
LValueBaseInfo BaseInfo = base.getBaseInfo();
if (field->isBitField()) {
@@ -5090,7 +5100,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
if (!IsInPreservedAIRegion &&
(!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>()))
// For structs, we GEP to the field that the record layout suggests.
- addr = emitAddrOfFieldStorage(*this, addr, field);
+ addr = emitAddrOfFieldStorage(*this, addr, field, IsBaseConstantNull);
else
// Remember the original struct field index
addr = emitPreserveStructAccess(*this, base, addr, field);
@@ -5134,7 +5144,8 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base,
if (!FieldType->isReferenceType())
return EmitLValueForField(Base, Field);
- Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field);
+ Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field,
+ /*IsBaseConstantNull=*/false);
// Make sure that the address is pointing to the right type.
llvm::Type *llvmType = ConvertTypeForMem(FieldType);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ca00a0e8c6cf4..46e0dde6a1090 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4472,7 +4472,8 @@ class CodeGenFunction : public CodeGenTypeCache {
const ObjCIvarDecl *Ivar);
llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface,
const ObjCIvarDecl *Ivar);
- LValue EmitLValueForField(LValue Base, const FieldDecl *Field);
+ LValue EmitLValueForField(LValue Base, const FieldDecl *Field,
+ bool IsBaseConstantNull = false);
LValue EmitLValueForLambdaField(const FieldDecl *Field);
LValue EmitLValueForLambdaField(const FieldDecl *Field,
llvm::Value *ThisValue);
>From 586bc15ed811805e159f2d18d436ccf42cb789c5 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 14 Mar 2025 15:41:22 +0800
Subject: [PATCH 03/11] [Clang][CodeGen] Add more tests.
---
clang/lib/CodeGen/CGBuilder.h | 18 ++++++++-----
clang/lib/CodeGen/CGExpr.cpp | 8 +++---
...r-and-nonzero-offset-in-offsetof-idiom.cpp | 26 +++++++++++++++++++
3 files changed, 43 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index b8036cf6e6a30..88ec55b322cf5 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -215,19 +215,25 @@ class CGBuilderTy : public CGBuilderBaseTy {
///
/// This API assumes that drilling into a struct like this is always an
/// inbounds and nuw operation.
+ /// Specifically, inbounds flag will not be set if \p IsBaseConstantNull is
+ /// true.
using CGBuilderBaseTy::CreateStructGEP;
Address CreateStructGEP(Address Addr, unsigned Index,
- const llvm::Twine &Name = "") {
+ const llvm::Twine &Name = "",
+ bool IsBaseConstantNull = false) {
llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType());
const llvm::DataLayout &DL = BB->getDataLayout();
const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
- return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(),
- Index, Name),
- ElTy->getElementType(Index),
- Addr.getAlignment().alignmentAtOffset(Offset),
- Addr.isKnownNonNull());
+ llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
+ if (!IsBaseConstantNull)
+ NWFlags |= llvm::GEPNoWrapFlags::inBounds();
+ return Address(
+ CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
+ Index, Name, NWFlags),
+ ElTy->getElementType(Index),
+ Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
}
/// Given
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 0b32d03e143c5..b10dbee783541 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4792,6 +4792,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
}
Expr *BaseExpr = E->getBase();
+ // Check whether the underlying base pointer is a constant null.
+ // If so, we do not set inbounds flag for GEP to avoid breaking some old-style
+ // offsetof idioms.
Expr *UnderlyingBaseExpr = BaseExpr;
while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
UnderlyingBaseExpr = BaseMemberExpr->getBase();
@@ -4930,9 +4933,8 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
unsigned idx =
CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
- if (IsBaseConstantNull)
- return CGF.Builder.CreateConstGEP(base, idx, field->getName());
- return CGF.Builder.CreateStructGEP(base, idx, field->getName());
+ return CGF.Builder.CreateStructGEP(base, idx, field->getName(),
+ IsBaseConstantNull);
}
static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base,
diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index f00a2c486574c..ac45d2e0da4fc 100644
--- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -16,6 +16,32 @@ uintptr_t get_offset_of_y_naively() {
return ((uintptr_t)(&(((S *)nullptr)->y)));
}
+struct Empty {};
+
+struct T {
+ int a;
+ S s;
+ [[no_unique_address]] Empty e1;
+ int b;
+ [[no_unique_address]] Empty e2;
+};
+
+// CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_naively_nested() {
+ return ((uintptr_t)(&(((T *)nullptr)->s.y)));
+}
+
+// CHECK-LABEL: @_Z26get_offset_of_zero_storagev(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) to i64)
+//
+uintptr_t get_offset_of_zero_storage() {
+ return ((uintptr_t)(&(((T *)nullptr)->e2)));
+}
+
// CHECK-LABEL: @_Z27get_offset_of_y_via_builtinv(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i64 4
>From b11610211414fc4aa615a669bf41872ef34f85ab Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 20 Mar 2025 17:16:26 +0800
Subject: [PATCH 04/11] [Clang][CodeGen] Handle parens
---
clang/lib/CodeGen/CGExpr.cpp | 4 ++--
...catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp | 8 ++++++++
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index b10dbee783541..6b5c4e7291031 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4795,9 +4795,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
// Check whether the underlying base pointer is a constant null.
// If so, we do not set inbounds flag for GEP to avoid breaking some old-style
// offsetof idioms.
- Expr *UnderlyingBaseExpr = BaseExpr;
+ Expr *UnderlyingBaseExpr = BaseExpr->IgnoreParens();
while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
- UnderlyingBaseExpr = BaseMemberExpr->getBase();
+ UnderlyingBaseExpr = BaseMemberExpr->getBase()->IgnoreParens();
bool IsBaseConstantNull = getContext().isSentinelNullExpr(UnderlyingBaseExpr);
// If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar.
LValue BaseLV;
diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index ac45d2e0da4fc..2df1df53a3448 100644
--- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -34,6 +34,14 @@ uintptr_t get_offset_of_y_naively_nested() {
return ((uintptr_t)(&(((T *)nullptr)->s.y)));
}
+// CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_naively_nested_with_parens() {
+ return ((uintptr_t)(&((((T *)nullptr)->s).y)));
+}
+
// CHECK-LABEL: @_Z26get_offset_of_zero_storagev(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) to i64)
>From 78f88f09c57aecda41a9f910ec2fb4462ad8313e Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 20 Mar 2025 17:57:59 +0800
Subject: [PATCH 05/11] [Clang][CodeGen] Use `createConstGEP2_32`
---
clang/lib/CodeGen/CGBuilder.h | 42 ++++++++++---------
clang/lib/CodeGen/CGExpr.cpp | 5 ++-
...ptr-and-nonzero-offset-in-offsetof-idiom.c | 2 +-
...r-and-nonzero-offset-in-offsetof-idiom.cpp | 6 +--
4 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 88ec55b322cf5..3a133447c62e1 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -64,21 +64,27 @@ class CGBuilderTy : public CGBuilderBaseTy {
Address createConstGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1,
const llvm::Twine &Name) {
const llvm::DataLayout &DL = BB->getDataLayout();
- llvm::GetElementPtrInst *GEP;
+ llvm::Value *V;
if (IsInBounds)
- GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32(
- Addr.getElementType(), emitRawPointerFromAddress(Addr), Idx0, Idx1,
- Name));
+ V = CreateConstInBoundsGEP2_32(Addr.getElementType(),
+ emitRawPointerFromAddress(Addr), Idx0,
+ Idx1, Name);
else
- GEP = cast<llvm::GetElementPtrInst>(CreateConstGEP2_32(
- Addr.getElementType(), emitRawPointerFromAddress(Addr), Idx0, Idx1,
- Name));
+ V = CreateConstGEP2_32(Addr.getElementType(),
+ emitRawPointerFromAddress(Addr), Idx0, Idx1, Name);
llvm::APInt Offset(
DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0,
/*isSigned=*/true);
- if (!GEP->accumulateConstantOffset(DL, Offset))
- llvm_unreachable("offset of GEP with constants is always computable");
- return Address(GEP, GEP->getResultElementType(),
+ llvm::Type *ElementTy = nullptr;
+ if (auto *GEP = dyn_cast<llvm::GEPOperator>(V)) {
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ llvm_unreachable("offset of GEP with constants is always computable");
+ ElementTy = GEP->getResultElementType();
+ } else {
+ ElementTy = llvm::GetElementPtrInst::getIndexedType(Addr.getElementType(),
+ {Idx0, Idx1});
+ }
+ return Address(V, ElementTy,
Addr.getAlignment().alignmentAtOffset(
CharUnits::fromQuantity(Offset.getSExtValue())),
IsInBounds ? Addr.isKnownNonNull() : NotKnownNonNull);
@@ -219,21 +225,17 @@ class CGBuilderTy : public CGBuilderBaseTy {
/// true.
using CGBuilderBaseTy::CreateStructGEP;
Address CreateStructGEP(Address Addr, unsigned Index,
- const llvm::Twine &Name = "",
- bool IsBaseConstantNull = false) {
+ const llvm::Twine &Name = "") {
llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType());
const llvm::DataLayout &DL = BB->getDataLayout();
const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
- llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
- if (!IsBaseConstantNull)
- NWFlags |= llvm::GEPNoWrapFlags::inBounds();
- return Address(
- CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
- Index, Name, NWFlags),
- ElTy->getElementType(Index),
- Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
+ return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(),
+ Index, Name),
+ ElTy->getElementType(Index),
+ Addr.getAlignment().alignmentAtOffset(Offset),
+ Addr.isKnownNonNull());
}
/// Given
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6b5c4e7291031..2b12cecaf3b43 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4933,8 +4933,9 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
unsigned idx =
CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
- return CGF.Builder.CreateStructGEP(base, idx, field->getName(),
- IsBaseConstantNull);
+ if (IsBaseConstantNull)
+ return CGF.Builder.CreateConstGEP2_32(base, 0, idx, field->getName());
+ return CGF.Builder.CreateStructGEP(base, idx, field->getName());
}
static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base,
diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
index a7cfd77766712..46e22fbdb38ac 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
@@ -17,7 +17,7 @@ struct S {
// CHECK-LABEL: @get_offset_of_y_naively(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
//
uintptr_t get_offset_of_y_naively(void) {
return ((uintptr_t)(&(((struct S *)0)->y)));
diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index 2df1df53a3448..7fa81e0986b21 100644
--- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -10,7 +10,7 @@ struct S {
// CHECK-LABEL: @_Z23get_offset_of_y_naivelyv(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
//
uintptr_t get_offset_of_y_naively() {
return ((uintptr_t)(&(((S *)nullptr)->y)));
@@ -28,7 +28,7 @@ struct T {
// CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
//
uintptr_t get_offset_of_y_naively_nested() {
return ((uintptr_t)(&(((T *)nullptr)->s.y)));
@@ -36,7 +36,7 @@ uintptr_t get_offset_of_y_naively_nested() {
// CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
//
uintptr_t get_offset_of_y_naively_nested_with_parens() {
return ((uintptr_t)(&((((T *)nullptr)->s).y)));
>From a5e4bc66c00a63fb354e99c3d15d0cae8eb2b8dc Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 20 Mar 2025 18:11:06 +0800
Subject: [PATCH 06/11] [Clang][CodeGen] Add release note.
---
clang/docs/ReleaseNotes.rst | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index e409f206f6eae..536b477ed02a3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -42,6 +42,11 @@ Potentially Breaking Changes
C/C++ Language Potentially Breaking Changes
-------------------------------------------
+- Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` are treated
+ as UB. To avoid breaking existing code, ``inbounds`` flags will not be set for such patterns.
+ However, it is still highly recommended to use the UB-free builtin ``__builtin_offsetof``.
+ (#GH130734)
+
C++ Specific Potentially Breaking Changes
-----------------------------------------
>From 7807c12628b8687f2cb4db42d58157b565122688 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 20 Mar 2025 18:20:40 +0800
Subject: [PATCH 07/11] [Clang][CodeGen] Remove comment.
---
clang/lib/CodeGen/CGBuilder.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 3a133447c62e1..c5e25d9f8cae5 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -221,8 +221,6 @@ class CGBuilderTy : public CGBuilderBaseTy {
///
/// This API assumes that drilling into a struct like this is always an
/// inbounds and nuw operation.
- /// Specifically, inbounds flag will not be set if \p IsBaseConstantNull is
- /// true.
using CGBuilderBaseTy::CreateStructGEP;
Address CreateStructGEP(Address Addr, unsigned Index,
const llvm::Twine &Name = "") {
>From 668cabe2b05ac336ab7c4f14c7878e88337d4071 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 21 Mar 2025 21:54:16 +0800
Subject: [PATCH 08/11] [Clang][CodeGen] Add more tests. NFC.
---
...r-and-nonzero-offset-in-offsetof-idiom.cpp | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index 7fa81e0986b21..6a0d6265eff96 100644
--- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -50,6 +50,42 @@ uintptr_t get_offset_of_zero_storage() {
return ((uintptr_t)(&(((T *)nullptr)->e2)));
}
+namespace std { typedef decltype(__nullptr) nullptr_t; }
+// CHECK-LABEL: @_Z29get_offset_of_y_integral_zerov(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_integral_zero() {
+ return ((uintptr_t)(&(((S *)0)->y)));
+}
+
+// CHECK-LABEL: @_Z37get_offset_of_y_integral_zero_voidptrv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_integral_zero_voidptr() {
+ return ((uintptr_t)(&(((S *)(void*)0)->y)));
+}
+
+// CHECK-LABEL: @_Z25get_offset_of_y_nullptr_tv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_nullptr_t() {
+ return ((uintptr_t)(&(((S *)std::nullptr_t{})->y)));
+}
+
+// CHECK-LABEL: @_Z32get_offset_of_y_nullptr_constantv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[NULL:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr null, ptr [[NULL]], align 8
+// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_nullptr_constant() {
+ constexpr void *null = nullptr;
+ return ((uintptr_t)(&(((S *)null)->y)));
+}
+
// CHECK-LABEL: @_Z27get_offset_of_y_via_builtinv(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i64 4
>From 32cff3eae16eb5c4be42e830823ef8d754fd4c61 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 22 Mar 2025 16:35:38 +0800
Subject: [PATCH 09/11] [Clang] Update release notes. NFC.
---
clang/docs/ReleaseNotes.rst | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 536b477ed02a3..90f435c9f1e9b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -44,8 +44,9 @@ C/C++ Language Potentially Breaking Changes
- Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` are treated
as UB. To avoid breaking existing code, ``inbounds`` flags will not be set for such patterns.
- However, it is still highly recommended to use the UB-free builtin ``__builtin_offsetof``.
- (#GH130734)
+ However, it is still highly recommended to use the UB-free macro ``offsetof`` or clang builtin
+ function ``__builtin_offsetof``. It is also possible to use ``-fwrapv-pointer`` or
+ ``-fno-delete-null-pointer-checks`` to make this behavior well-defined. (#GH130734, #GH130742)
C++ Specific Potentially Breaking Changes
-----------------------------------------
>From b8a45206155aabff8f52b11fb5fd570fc010fa0b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 31 Mar 2025 16:31:15 +0800
Subject: [PATCH 10/11] [Clang][Docs] Clarify release notes. NFC.
---
clang/docs/ReleaseNotes.rst | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 90f435c9f1e9b..d67183aa82637 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -42,11 +42,12 @@ Potentially Breaking Changes
C/C++ Language Potentially Breaking Changes
-------------------------------------------
-- Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` are treated
- as UB. To avoid breaking existing code, ``inbounds`` flags will not be set for such patterns.
- However, it is still highly recommended to use the UB-free macro ``offsetof`` or clang builtin
- function ``__builtin_offsetof``. It is also possible to use ``-fwrapv-pointer`` or
- ``-fno-delete-null-pointer-checks`` to make this behavior well-defined. (#GH130734, #GH130742)
+- New LLVM optimizations have been implemented that optimize pointer arithmetic on
+ null pointers more aggressively. As part of this, clang has implemented a special
+ case for old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))``, to
+ ensure they are not caught by these optimizations. It is also possible to use
+ ``-fwrapv-pointer`` or ``-fno-delete-null-pointer-checks`` to make pointer arithmetic
+ on null pointers well-defined. (#GH130734, #GH130742)
C++ Specific Potentially Breaking Changes
-----------------------------------------
>From ad576ca9cf47df080e6211d09ab96c7fa21ec746 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 31 Mar 2025 17:03:52 +0800
Subject: [PATCH 11/11] [Clang] Do not use the GEP result to infer offset
---
clang/lib/CodeGen/CGBuilder.h | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index c5e25d9f8cae5..090f75d3b5d3c 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -75,15 +75,13 @@ class CGBuilderTy : public CGBuilderBaseTy {
llvm::APInt Offset(
DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0,
/*isSigned=*/true);
- llvm::Type *ElementTy = nullptr;
- if (auto *GEP = dyn_cast<llvm::GEPOperator>(V)) {
- if (!GEP->accumulateConstantOffset(DL, Offset))
- llvm_unreachable("offset of GEP with constants is always computable");
- ElementTy = GEP->getResultElementType();
- } else {
- ElementTy = llvm::GetElementPtrInst::getIndexedType(Addr.getElementType(),
- {Idx0, Idx1});
- }
+ if (!llvm::GEPOperator::accumulateConstantOffset(
+ Addr.getElementType(), {getInt32(Idx0), getInt32(Idx1)}, DL,
+ Offset))
+ llvm_unreachable(
+ "accumulateConstantOffset with constant indices should not fail.");
+ llvm::Type *ElementTy = llvm::GetElementPtrInst::getIndexedType(
+ Addr.getElementType(), {Idx0, Idx1});
return Address(V, ElementTy,
Addr.getAlignment().alignmentAtOffset(
CharUnits::fromQuantity(Offset.getSExtValue())),
More information about the cfe-commits
mailing list