[clang] [Clang][CodeGen] Do not set inbounds flag in `EmitMemberDataPointerAddress` when the base pointer is null (PR #130952)

Yingwei Zheng via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 9 21:19:47 PDT 2025


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/130952

>From 0f6ff605da3cbadc5311d4bf6c08fe98970a69c3 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 10 Apr 2025 09:54:33 +0800
Subject: [PATCH 1/5] [Clang][CodeGen] Do not set inbounds flag for struct GEP
 with null base pointers

---
 clang/docs/ReleaseNotes.rst                   |  7 ++
 clang/lib/CodeGen/CGExpr.cpp                  | 33 ++++++---
 clang/lib/CodeGen/CodeGenFunction.h           |  3 +-
 ...ptr-and-nonzero-offset-in-offsetof-idiom.c |  2 +-
 ...r-and-nonzero-offset-in-offsetof-idiom.cpp | 72 ++++++++++++++++++-
 5 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index cd16641c25ed8..c2f7a519d270a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -42,6 +42,13 @@ Potentially Breaking Changes
 C/C++ Language Potentially Breaking Changes
 -------------------------------------------
 
+- New LLVM optimizations have been implemented that optimize pointer arithmetic on
+  null pointers more aggressively.  As part of this, clang has implemented a special
+  case for old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))``, to
+  ensure they are not caught by these optimizations.  It is also possible to use
+  ``-fwrapv-pointer`` or   ``-fno-delete-null-pointer-checks`` to make pointer arithmetic
+  on null pointers well-defined. (#GH130734, #GH130742)
+
 C++ Specific Potentially Breaking Changes
 -----------------------------------------
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 451034395976f..c8ff2c880a655 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4785,6 +4785,16 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
   }
 
   Expr *BaseExpr = E->getBase();
+  bool IsInBounds = !getLangOpts().PointerOverflowDefined;
+  if (IsInBounds) {
+    // Check whether the underlying base pointer is a constant null.
+    // If so, we do not set inbounds flag for GEP to avoid breaking some
+    // old-style offsetof idioms.
+    Expr *UnderlyingBaseExpr = BaseExpr->IgnoreParens();
+    while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
+      UnderlyingBaseExpr = BaseMemberExpr->getBase()->IgnoreParens();
+    IsInBounds = !getContext().isSentinelNullExpr(UnderlyingBaseExpr);
+  }
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
   if (E->isArrow()) {
@@ -4806,7 +4816,7 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
 
   NamedDecl *ND = E->getMemberDecl();
   if (auto *Field = dyn_cast<FieldDecl>(ND)) {
-    LValue LV = EmitLValueForField(BaseLV, Field);
+    LValue LV = EmitLValueForField(BaseLV, Field, IsInBounds);
     setObjCGCLValueClass(getContext(), E, LV);
     if (getLangOpts().OpenMP) {
       // If the member was explicitly marked as nontemporal, mark it as
@@ -4892,12 +4902,15 @@ unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
 /// Get the address of a zero-sized field within a record. The resulting
 /// address doesn't necessarily have the right type.
 static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
-                                       const FieldDecl *Field) {
+                                       const FieldDecl *Field,
+                                       bool IsInBounds) {
   CharUnits Offset = CGF.getContext().toCharUnitsFromBits(
       CGF.getContext().getFieldOffset(Field));
   if (Offset.isZero())
     return Base;
   Base = Base.withElementType(CGF.Int8Ty);
+  if (!IsInBounds)
+    return CGF.Builder.CreateConstByteGEP(Base, Offset);
   return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset);
 }
 
@@ -4906,16 +4919,16 @@ static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
 ///
 /// The resulting address doesn't necessarily have the right type.
 static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
-                                      const FieldDecl *field) {
+                                      const FieldDecl *field, bool IsInBounds) {
   if (isEmptyFieldForLayout(CGF.getContext(), field))
-    return emitAddrOfZeroSizeField(CGF, base, field);
+    return emitAddrOfZeroSizeField(CGF, base, field, IsInBounds);
 
   const RecordDecl *rec = field->getParent();
 
   unsigned idx =
     CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
-  if (CGF.getLangOpts().PointerOverflowDefined)
+  if (!IsInBounds)
     return CGF.Builder.CreateConstGEP2_32(base, 0, idx, field->getName());
 
   return CGF.Builder.CreateStructGEP(base, idx, field->getName());
@@ -4953,8 +4966,8 @@ static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
   return false;
 }
 
-LValue CodeGenFunction::EmitLValueForField(LValue base,
-                                           const FieldDecl *field) {
+LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field,
+                                           bool IsInBounds) {
   LValueBaseInfo BaseInfo = base.getBaseInfo();
 
   if (field->isBitField()) {
@@ -5090,7 +5103,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
     if (!IsInPreservedAIRegion &&
         (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>()))
       // For structs, we GEP to the field that the record layout suggests.
-      addr = emitAddrOfFieldStorage(*this, addr, field);
+      addr = emitAddrOfFieldStorage(*this, addr, field, IsInBounds);
     else
       // Remember the original struct field index
       addr = emitPreserveStructAccess(*this, base, addr, field);
@@ -5134,7 +5147,9 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base,
   if (!FieldType->isReferenceType())
     return EmitLValueForField(Base, Field);
 
-  Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field);
+  Address V = emitAddrOfFieldStorage(
+      *this, Base.getAddress(), Field,
+      /*IsInBounds=*/!getLangOpts().PointerOverflowDefined);
 
   // Make sure that the address is pointing to the right type.
   llvm::Type *llvmType = ConvertTypeForMem(FieldType);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 2b1062d6d307c..27c464bc98a12 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4459,7 +4459,8 @@ class CodeGenFunction : public CodeGenTypeCache {
                               const ObjCIvarDecl *Ivar);
   llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface,
                                            const ObjCIvarDecl *Ivar);
-  LValue EmitLValueForField(LValue Base, const FieldDecl *Field);
+  LValue EmitLValueForField(LValue Base, const FieldDecl *Field,
+                            bool IsInBounds = false);
   LValue EmitLValueForLambdaField(const FieldDecl *Field);
   LValue EmitLValueForLambdaField(const FieldDecl *Field,
                                   llvm::Value *ThisValue);
diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
index 68c0ee3a3a885..46e22fbdb38ac 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
@@ -17,7 +17,7 @@ struct S {
 
 // CHECK-LABEL: @get_offset_of_y_naively(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively(void) {
   return ((uintptr_t)(&(((struct S *)0)->y)));
diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index 34d4f4c9e34eb..6a0d6265eff96 100644
--- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -10,12 +10,82 @@ struct S {
 
 // CHECK-LABEL: @_Z23get_offset_of_y_naivelyv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively() {
   return ((uintptr_t)(&(((S *)nullptr)->y)));
 }
 
+struct Empty {};
+
+struct T {
+  int a;
+  S s;
+  [[no_unique_address]] Empty e1;
+  int b;
+  [[no_unique_address]] Empty e2;
+};
+
+// CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_naively_nested() {
+  return ((uintptr_t)(&(((T *)nullptr)->s.y)));
+}
+
+// CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_naively_nested_with_parens() {
+  return ((uintptr_t)(&((((T *)nullptr)->s).y)));
+}
+
+// CHECK-LABEL: @_Z26get_offset_of_zero_storagev(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) to i64)
+//
+uintptr_t get_offset_of_zero_storage() {
+  return ((uintptr_t)(&(((T *)nullptr)->e2)));
+}
+
+namespace std { typedef decltype(__nullptr) nullptr_t; }
+// CHECK-LABEL: @_Z29get_offset_of_y_integral_zerov(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_integral_zero() {
+  return ((uintptr_t)(&(((S *)0)->y)));
+}
+
+// CHECK-LABEL: @_Z37get_offset_of_y_integral_zero_voidptrv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_integral_zero_voidptr() {
+  return ((uintptr_t)(&(((S *)(void*)0)->y)));
+}
+
+// CHECK-LABEL: @_Z25get_offset_of_y_nullptr_tv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_nullptr_t() {
+  return ((uintptr_t)(&(((S *)std::nullptr_t{})->y)));
+}
+
+// CHECK-LABEL: @_Z32get_offset_of_y_nullptr_constantv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[NULL:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr null, ptr [[NULL]], align 8
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+//
+uintptr_t get_offset_of_y_nullptr_constant() {
+  constexpr void *null = nullptr;
+  return ((uintptr_t)(&(((S *)null)->y)));
+}
+
 // CHECK-LABEL: @_Z27get_offset_of_y_via_builtinv(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i64 4

>From bbe2e97c5d89ec1004c32d3df094db1bbca8bfd4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 10 Apr 2025 11:08:30 +0800
Subject: [PATCH 2/5] [Clang][CodeGen] Fix test failures.

---
 clang/lib/CodeGen/CGExpr.cpp        | 2 +-
 clang/lib/CodeGen/CodeGenFunction.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c8ff2c880a655..82b7358839ac7 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4990,7 +4990,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field,
           (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
         if (Idx != 0) {
           // For structs, we GEP to the field that the record layout suggests.
-          if (getLangOpts().PointerOverflowDefined)
+          if (!IsInBounds)
             Addr = Builder.CreateConstGEP2_32(Addr, 0, Idx, field->getName());
           else
             Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 27c464bc98a12..c6cdb35df8677 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4460,7 +4460,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface,
                                            const ObjCIvarDecl *Ivar);
   LValue EmitLValueForField(LValue Base, const FieldDecl *Field,
-                            bool IsInBounds = false);
+                            bool IsInBounds = true);
   LValue EmitLValueForLambdaField(const FieldDecl *Field);
   LValue EmitLValueForLambdaField(const FieldDecl *Field,
                                   llvm::Value *ThisValue);

>From c287e15a9c88676d91da310916bac7f5b3787286 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 10 Apr 2025 11:30:18 +0800
Subject: [PATCH 3/5] [Clang][CodeGen] Add helper
 `isUnderlyingBasePointerConstantNull`

---
 clang/lib/CodeGen/CGExpr.cpp        | 22 ++++++++++++----------
 clang/lib/CodeGen/CodeGenFunction.h |  2 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 82b7358839ac7..df4912a2f5102 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4778,6 +4778,13 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
                                   Base.getBaseInfo(), TBAAAccessInfo());
 }
 
+bool CodeGenFunction::isUnderlyingBasePointerConstantNull(const Expr *E) {
+  const Expr *UnderlyingBaseExpr = E->IgnoreParens();
+  while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
+    UnderlyingBaseExpr = BaseMemberExpr->getBase()->IgnoreParens();
+  return getContext().isSentinelNullExpr(UnderlyingBaseExpr);
+}
+
 LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
   if (DeclRefExpr *DRE = tryToConvertMemberExprToDeclRefExpr(*this, E)) {
     EmitIgnoredExpr(E->getBase());
@@ -4785,16 +4792,11 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
   }
 
   Expr *BaseExpr = E->getBase();
-  bool IsInBounds = !getLangOpts().PointerOverflowDefined;
-  if (IsInBounds) {
-    // Check whether the underlying base pointer is a constant null.
-    // If so, we do not set inbounds flag for GEP to avoid breaking some
-    // old-style offsetof idioms.
-    Expr *UnderlyingBaseExpr = BaseExpr->IgnoreParens();
-    while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
-      UnderlyingBaseExpr = BaseMemberExpr->getBase()->IgnoreParens();
-    IsInBounds = !getContext().isSentinelNullExpr(UnderlyingBaseExpr);
-  }
+  // Check whether the underlying base pointer is a constant null.
+  // If so, we do not set inbounds flag for GEP to avoid breaking some
+  // old-style offsetof idioms.
+  bool IsInBounds = !getLangOpts().PointerOverflowDefined &&
+                    !isUnderlyingBasePointerConstantNull(BaseExpr);
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
   if (E->isArrow()) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index c6cdb35df8677..d601c155dc215 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4557,6 +4557,8 @@ class CodeGenFunction : public CodeGenTypeCache {
                                                const CXXRecordDecl *RD);
 
   bool isPointerKnownNonNull(const Expr *E);
+  /// Check whether the underlying base pointer is a constant null.
+  bool isUnderlyingBasePointerConstantNull(const Expr *E);
 
   /// Create the discriminator from the storage address and the entity hash.
   llvm::Value *EmitPointerAuthBlendDiscriminator(llvm::Value *StorageAddress,

>From d78261a69ab0beb29f1ec8d68c0f2507717bb020 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 12 Mar 2025 20:17:28 +0800
Subject: [PATCH 4/5] [Clang][CodeGen] Do not set inbounds in
 `EmitMemberDataPointerAddress` when the base pointer is null

---
 clang/lib/CodeGen/ItaniumCXXABI.cpp           | 11 +++++++---
 clang/lib/CodeGen/MicrosoftCXXABI.cpp         | 10 ++++++---
 .../microsoft-abi-member-pointers.cpp         | 21 +++++++++++++++++++
 .../CodeGenCXX/pointers-to-data-members.cpp   | 21 +++++++++++++++++++
 4 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 2822d526a54b0..400510d7056b3 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -872,9 +872,14 @@ llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
 
   CGBuilderTy &Builder = CGF.Builder;
 
-  // Apply the offset, which we assume is non-null.
-  return Builder.CreateInBoundsGEP(CGF.Int8Ty, Base.emitRawPointer(CGF), MemPtr,
-                                   "memptr.offset");
+  // Apply the offset.
+  // Specially, we don't add inbounds flags if the base pointer is a constant
+  // null. This is a workaround for old-style container_of macros.
+  llvm::Value *BaseAddr = Base.emitRawPointer(CGF);
+  return Builder.CreateGEP(CGF.Int8Ty, BaseAddr, MemPtr, "memptr.offset",
+                           isa<llvm::ConstantPointerNull>(BaseAddr)
+                               ? llvm::GEPNoWrapFlags::none()
+                               : llvm::GEPNoWrapFlags::inBounds());
 }
 
 // See if it's possible to return a constant signed pointer.
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 7bef436302526..c32ca3be3405e 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -3269,9 +3269,13 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
     Addr = Base.emitRawPointer(CGF);
   }
 
-  // Apply the offset, which we assume is non-null.
-  return Builder.CreateInBoundsGEP(CGF.Int8Ty, Addr, FieldOffset,
-                                   "memptr.offset");
+  // Apply the offset.
+  // Specially, we don't add inbounds flags if the base pointer is a constant
+  // null. This is a workaround for old-style container_of macros.
+  return Builder.CreateGEP(CGF.Int8Ty, Addr, FieldOffset, "memptr.offset",
+                           isa<llvm::ConstantPointerNull>(Addr)
+                               ? llvm::GEPNoWrapFlags::none()
+                               : llvm::GEPNoWrapFlags::inBounds());
 }
 
 llvm::Value *
diff --git a/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp b/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
index fc8a31e0350e5..fe4cab164249f 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
@@ -964,3 +964,24 @@ static_assert(sizeof(b::d) == 16, "");
 static_assert(sizeof(void (a<int>::*)()) == 16, "");
 #endif
 }
+
+namespace ContainerOf {
+  using size_t = unsigned long long;
+
+  struct List {
+    int data;
+  };
+
+  struct Node {
+    int data;
+    struct List list1;
+    struct List list2;
+  };
+
+  // CHECK-LABEL: define{{.*}} ptr @"?getOwner at ContainerOf@@
+  // CHECK: %memptr.offset = getelementptr i8, ptr null, {{.*}}
+  Node* getOwner(List *list, List Node::*member) {
+    size_t offset = reinterpret_cast<size_t>(&((Node*)nullptr->*member));
+    return reinterpret_cast<Node*>(reinterpret_cast<char*>(list) - offset);
+  }
+}
diff --git a/clang/test/CodeGenCXX/pointers-to-data-members.cpp b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
index cf1d6c018409d..2ee6c65cf167d 100644
--- a/clang/test/CodeGenCXX/pointers-to-data-members.cpp
+++ b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
@@ -265,3 +265,24 @@ namespace PR47864 {
   struct D : B { int m; };
   auto x = (int B::*)&D::m;
 }
+
+namespace ContainerOf {
+  using size_t = unsigned long long;
+
+  struct List {
+    int data;
+  };
+
+  struct Node {
+    int data;
+    struct List list1;
+    struct List list2;
+  };
+
+  // CHECK-LABEL: define{{.*}} ptr @_ZN11ContainerOf8getOwnerEPNS_4ListEMNS_4NodeES0_
+  // CHECK: %memptr.offset = getelementptr i8, ptr null, i64 {{.*}}
+  Node* getOwner(List *list, List Node::*member) {
+    size_t offset = reinterpret_cast<size_t>(&((Node*)nullptr->*member));
+    return reinterpret_cast<Node*>(reinterpret_cast<char*>(list) - offset);
+  }
+}

>From 425801db3644127d5b5c276c8d5865aa4c2685ed Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 10 Apr 2025 12:19:15 +0800
Subject: [PATCH 5/5] [Clang][CodeGen] Address review comments.

---
 clang/lib/CodeGen/CGCXXABI.cpp        |  7 +++----
 clang/lib/CodeGen/CGCXXABI.h          |  2 +-
 clang/lib/CodeGen/CGClass.cpp         | 15 ++++++---------
 clang/lib/CodeGen/CGExpr.cpp          | 11 ++++++-----
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +-
 clang/lib/CodeGen/ItaniumCXXABI.cpp   | 18 +++++++-----------
 clang/lib/CodeGen/MicrosoftCXXABI.cpp | 17 +++++++----------
 7 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp
index 9f77fbec21380..93dc17bac95e0 100644
--- a/clang/lib/CodeGen/CGCXXABI.cpp
+++ b/clang/lib/CodeGen/CGCXXABI.cpp
@@ -60,10 +60,9 @@ CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer(
   return CGCallee::forDirect(FnPtr, FPT);
 }
 
-llvm::Value *
-CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                                       Address Base, llvm::Value *MemPtr,
-                                       const MemberPointerType *MPT) {
+llvm::Value *CGCXXABI::EmitMemberDataPointerAddress(
+    CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
+    const MemberPointerType *MPT, bool IsInBounds) {
   ErrorUnsupportedABI(CGF, "loads of member pointers");
   llvm::Type *Ty =
       llvm::PointerType::get(CGF.getLLVMContext(), Base.getAddressSpace());
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index 148a7ba6df7e6..da2d367da7096 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -192,7 +192,7 @@ class CGCXXABI {
   virtual llvm::Value *
   EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
                                Address Base, llvm::Value *MemPtr,
-                               const MemberPointerType *MPT);
+                               const MemberPointerType *MPT, bool IsInBounds);
 
   /// Perform a derived-to-base, base-to-derived, or bitcast member
   /// pointer conversion.
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index c683dbb0af825..0f2422a6a665a 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -147,16 +147,13 @@ Address CodeGenFunction::LoadCXXThisAddress() {
 /// Emit the address of a field using a member data pointer.
 ///
 /// \param E Only used for emergency diagnostics
-Address
-CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
-                                                 llvm::Value *memberPtr,
-                                      const MemberPointerType *memberPtrType,
-                                                 LValueBaseInfo *BaseInfo,
-                                                 TBAAAccessInfo *TBAAInfo) {
+Address CodeGenFunction::EmitCXXMemberDataPointerAddress(
+    const Expr *E, Address base, llvm::Value *memberPtr,
+    const MemberPointerType *memberPtrType, bool IsInBounds,
+    LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) {
   // Ask the ABI to compute the actual address.
-  llvm::Value *ptr =
-    CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base,
-                                                 memberPtr, memberPtrType);
+  llvm::Value *ptr = CGM.getCXXABI().EmitMemberDataPointerAddress(
+      *this, E, base, memberPtr, memberPtrType, IsInBounds);
 
   QualType memberType = memberPtrType->getPointeeType();
   CharUnits memberAlign =
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index df4912a2f5102..21a81e02a4f83 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -660,8 +660,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
 
     case SubobjectAdjustment::MemberPointerAdjustment: {
       llvm::Value *Ptr = EmitScalarExpr(Adjustment.Ptr.RHS);
-      Object = EmitCXXMemberDataPointerAddress(E, Object, Ptr,
-                                               Adjustment.Ptr.MPT);
+      Object = EmitCXXMemberDataPointerAddress(
+          E, Object, Ptr, Adjustment.Ptr.MPT, /*IsInBounds=*/true);
       break;
     }
     }
@@ -6301,9 +6301,10 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
 
   LValueBaseInfo BaseInfo;
   TBAAAccessInfo TBAAInfo;
-  Address MemberAddr =
-    EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo,
-                                    &TBAAInfo);
+  bool IsInBounds = !getLangOpts().PointerOverflowDefined &&
+                    !isUnderlyingBasePointerConstantNull(E->getLHS());
+  Address MemberAddr = EmitCXXMemberDataPointerAddress(
+      E, BaseAddr, OffsetV, MPT, IsInBounds, &BaseInfo, &TBAAInfo);
 
   return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo, TBAAInfo);
 }
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index d601c155dc215..d675dd0186c03 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4640,7 +4640,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   // Compute the object pointer.
   Address EmitCXXMemberDataPointerAddress(
       const Expr *E, Address base, llvm::Value *memberPtr,
-      const MemberPointerType *memberPtrType,
+      const MemberPointerType *memberPtrType, bool IsInBounds,
       LValueBaseInfo *BaseInfo = nullptr, TBAAAccessInfo *TBAAInfo = nullptr);
   RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
                                       ReturnValueSlot ReturnValue,
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 400510d7056b3..35485dc6d867f 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -130,11 +130,10 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
                                     llvm::Value *MemFnPtr,
                                     const MemberPointerType *MPT) override;
 
-  llvm::Value *
-    EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                                 Address Base,
-                                 llvm::Value *MemPtr,
-                                 const MemberPointerType *MPT) override;
+  llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+                                            Address Base, llvm::Value *MemPtr,
+                                            const MemberPointerType *MPT,
+                                            bool IsInBounds) override;
 
   llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
                                            const CastExpr *E,
@@ -867,19 +866,16 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 /// base object.
 llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
     CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
-    const MemberPointerType *MPT) {
+    const MemberPointerType *MPT, bool IsInBounds) {
   assert(MemPtr->getType() == CGM.PtrDiffTy);
 
   CGBuilderTy &Builder = CGF.Builder;
 
   // Apply the offset.
-  // Specially, we don't add inbounds flags if the base pointer is a constant
-  // null. This is a workaround for old-style container_of macros.
   llvm::Value *BaseAddr = Base.emitRawPointer(CGF);
   return Builder.CreateGEP(CGF.Int8Ty, BaseAddr, MemPtr, "memptr.offset",
-                           isa<llvm::ConstantPointerNull>(BaseAddr)
-                               ? llvm::GEPNoWrapFlags::none()
-                               : llvm::GEPNoWrapFlags::inBounds());
+                           IsInBounds ? llvm::GEPNoWrapFlags::inBounds()
+                                      : llvm::GEPNoWrapFlags::none());
 }
 
 // See if it's possible to return a constant signed pointer.
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index c32ca3be3405e..93ca8704de742 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -689,10 +689,10 @@ class MicrosoftCXXABI : public CGCXXABI {
                                           llvm::Value *MemPtr,
                                           const MemberPointerType *MPT) override;
 
-  llvm::Value *
-  EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                               Address Base, llvm::Value *MemPtr,
-                               const MemberPointerType *MPT) override;
+  llvm::Value *EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
+                                            Address Base, llvm::Value *MemPtr,
+                                            const MemberPointerType *MPT,
+                                            bool IsInBounds) override;
 
   llvm::Value *EmitNonNullMemberPointerConversion(
       const MemberPointerType *SrcTy, const MemberPointerType *DstTy,
@@ -3240,7 +3240,7 @@ llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
 
 llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
     CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
-    const MemberPointerType *MPT) {
+    const MemberPointerType *MPT, bool IsInBounds) {
   assert(MPT->isMemberDataPointer());
   CGBuilderTy &Builder = CGF.Builder;
   const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
@@ -3270,12 +3270,9 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
   }
 
   // Apply the offset.
-  // Specially, we don't add inbounds flags if the base pointer is a constant
-  // null. This is a workaround for old-style container_of macros.
   return Builder.CreateGEP(CGF.Int8Ty, Addr, FieldOffset, "memptr.offset",
-                           isa<llvm::ConstantPointerNull>(Addr)
-                               ? llvm::GEPNoWrapFlags::none()
-                               : llvm::GEPNoWrapFlags::inBounds());
+                           IsInBounds ? llvm::GEPNoWrapFlags::inBounds()
+                                      : llvm::GEPNoWrapFlags::none());
 }
 
 llvm::Value *



More information about the cfe-commits mailing list