[clang] DRAFT - same changes as #194989, testing fresh build and clang format (PR #203459)

Helena Kotas via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 11 22:15:15 PDT 2026


https://github.com/hekota created https://github.com/llvm/llvm-project/pull/203459

None

>From 9933d7acf5e34762631eb73d3de902fb17b45e0f Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Wed, 29 Apr 2026 15:49:22 -0700
Subject: [PATCH 01/21] [HLSL] Disable implicit constructors for user-defined
 structs/classes

---
 clang/include/clang/AST/DeclCXX.h             |  66 ++++++--
 clang/lib/AST/Expr.cpp                        |   3 +-
 clang/lib/CodeGen/CGDecl.cpp                  |   8 +-
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  13 +-
 clang/lib/Sema/SemaExpr.cpp                   |  28 +++-
 clang/lib/Sema/SemaHLSL.cpp                   |  21 ++-
 clang/lib/Sema/SemaInit.cpp                   |  12 +-
 clang/lib/Sema/SemaOverload.cpp               |  14 +-
 clang/test/AST/HLSL/cbuffer.hlsl              |   2 +-
 clang/test/AST/HLSL/matrix-constructors.hlsl  |   2 +
 .../HLSL/semantic-output-struct-shadow.hlsl   |   4 +-
 .../test/AST/HLSL/semantic-output-struct.hlsl |   4 +-
 .../BasicFeatures/ArrayElementwiseCast.hlsl   |   4 +-
 .../CodeGenHLSL/BasicFeatures/InitLists.hlsl  | 156 +++++++++++++-----
 .../BasicFeatures/MatrixElementTypeCast.hlsl  |   6 +-
 .../BasicFeatures/StructElementwiseCast.hlsl  |  10 +-
 .../BasicFeatures/VectorElementwiseCast.hlsl  |  10 +-
 .../CodeGenHLSL/builtins/hlsl_resource_t.hlsl |   8 +-
 .../StructuredBuffers-subscripts.hlsl         |  16 +-
 .../resources/cbuffer_struct_passing.hlsl     | 122 ++++++++++----
 .../resources/resources-in-structs.hlsl       |   2 +-
 .../semantics/semantic-struct-2-output.hlsl   |   4 +-
 .../CodeGenHLSL/this-assignment-overload.hlsl |  52 +++---
 clang/test/CodeGenHLSL/this-assignment.hlsl   |  47 ++++--
 .../BuiltIns/WaveActiveAllTrue-errors.hlsl    |   2 +-
 .../BuiltIns/WaveActiveAnyTrue-errors.hlsl    |   2 +-
 .../BuiltIns/WaveActiveBallot-errors.hlsl     |   2 +-
 .../BuiltIns/WaveActiveCountBits-errors.hlsl  |   2 +-
 .../BuiltIns/dot4add_i8packed-errors.hlsl     |   2 +-
 .../BuiltIns/dot4add_u8packed-errors.hlsl     |   2 +-
 .../Language/AggregateSplatCast-errors.hlsl   |   4 +-
 .../Language/ElementwiseCast-errors.hlsl      |   7 +-
 .../SemaHLSL/Language/ElementwiseCasts.hlsl   |   1 +
 clang/test/SemaHLSL/Language/InitListAST.hlsl |   6 +
 clang/test/SemaHLSL/Language/InitLists.hlsl   |  10 +-
 clang/test/SemaHLSL/prohibit_pointer.hlsl     |   2 +-
 36 files changed, 455 insertions(+), 201 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 2af396f025c93..fe2bc0dd628c3 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -759,19 +759,40 @@ class CXXRecordDecl : public RecordDecl {
            needsImplicitDefaultConstructor();
   }
 
+  // Used by HLSL to determine if a record is a built-in implicit HLSL
+  // struct/class or a user-defined one. User-defined HLSL records cannot
+  // have ctors, dtors, or overloaded operators, while implicit built-in
+  // HLSL records such as resource classes can. It would be nice to use the
+  // isImplicit() methods to determine that, but this flag is not propagated
+  // to template-instanticated classes.
+  //
+  /// Determines whether this class has any user provided special members.
+  bool hasUserProvidedSpecialMembers() const {
+    return data().UserDeclaredSpecialMembers &
+               (SMF_MoveConstructor | SMF_MoveAssignment | SMF_Destructor |
+                SMF_CopyAssignment | SMF_CopyConstructor) ||
+           data().UserDeclaredConstructor ||
+           data().UserProvidedDefaultConstructor;
+  }
+
   /// Determine if we need to declare a default constructor for
   /// this class.
   ///
   /// This value is used for lazy creation of default constructors.
   bool needsImplicitDefaultConstructor() const {
-    return (!data().UserDeclaredConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
-            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
-           // FIXME: Proposed fix to core wording issue: if a class inherits
-           // a default constructor and doesn't explicitly declare one, one
-           // is declared implicitly.
-           (data().HasInheritedDefaultConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor));
+    return ((!data().UserDeclaredConstructor &&
+             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
+             (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
+            // FIXME: Proposed fix to core wording issue: if a class inherits
+            // a default constructor and doesn't explicitly declare one, one
+            // is declared implicitly.
+            (data().HasInheritedDefaultConstructor &&
+             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
+           // In HLSL, only built-in records like resources classes can have
+           // constructors.
+           (!getLangOpts().HLSL ||
+            (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -797,7 +818,11 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
-    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor);
+    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor) &&
+           // In HLSL, only built-in records like resources classes can have
+           // constructors.
+           (!getLangOpts().HLSL || isLambda() ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -893,8 +918,11 @@ class CXXRecordDecl : public RecordDecl {
     return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveAssignment() &&
-           !hasUserDeclaredDestructor();
+           !hasUserDeclaredMoveAssignment() && !hasUserDeclaredDestructor() &&
+           // In HLSL, only built-in records like resources classes can have
+           // constructors.
+           (!getLangOpts().HLSL || isLambda() ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted move
@@ -923,7 +951,11 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
-    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment);
+    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment) &&
+           // In HLSL, only built-in records like resources classes can have
+           // constructors.
+           (!getLangOpts().HLSL || isLambda() ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -984,9 +1016,13 @@ class CXXRecordDecl : public RecordDecl {
     return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveConstructor() &&
-           !hasUserDeclaredDestructor() &&
-           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable());
+           !hasUserDeclaredMoveConstructor() && !hasUserDeclaredDestructor() &&
+           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
+           // In HLSL, only built-in records like resources classes can have
+           // constructors.
+           (!getLangOpts().HLSL ||
+            (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a move assignment
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 64d61dbc3d128..9824367664a1e 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2082,7 +2082,8 @@ ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T,
   // Per C++ [conv.lval]p3, lvalue-to-rvalue conversions on class and
   // std::nullptr_t have special semantics not captured by CK_LValueToRValue.
   assert((Kind != CK_LValueToRValue ||
-          !(T->isNullPtrType() || T->getAsCXXRecordDecl())) &&
+          !(T->isNullPtrType() ||
+            (T->getAsCXXRecordDecl() && !C.getLangOpts().HLSL))) &&
          "invalid type for lvalue-to-rvalue conversion");
   ImplicitCastExpr *E =
       new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, FPO, VK);
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 419b3c477e7b2..28a14072982e8 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1525,10 +1525,14 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
     // isConstantInitializer produces wrong answers for structs with
     // reference or bitfield members, and a few other cases, and checking
     // for POD-ness protects us from some of these.
+    QualType BaseTy = getContext().getBaseElementType(Ty);
     if (D.getInit() && (Ty->isArrayType() || Ty->isRecordType()) &&
         (D.isConstexpr() ||
-         ((Ty.isPODType(getContext()) ||
-           getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) &&
+         ((Ty.isPODType(getContext()) || BaseTy->isObjCObjectPointerType() ||
+           // If HLSL, check if it's a constant initializer anyway because
+           // POD-ness will no longer be true for user defined structs
+           // (since they cannot have constructors or a destructor).
+           (getLangOpts().HLSL && BaseTy->isRecordType())) &&
           D.getInit()->isConstantInitializer(getContext())))) {
 
       // If the variable's a const type, and it's neither an NRVO
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index a134f6aab9490..575592dc61fec 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -1079,8 +1079,10 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
             PD->getAttr<HLSLParamModifierAttr>()) {
       llvm_unreachable("Not handled yet");
     } else {
-      llvm::Type *ParamType =
-          Param.hasByValAttr() ? Param.getParamByValType() : Param.getType();
+      llvm::Type *ParamType = Param.hasByValAttr() ? Param.getParamByValType()
+                              : PD->getType()->isRecordType()
+                                  ? CGM.getTypes().ConvertType(PD->getType())
+                                  : Param.getType();
       auto AttrBegin = PD->specific_attr_begin<HLSLAppliedSemanticAttr>();
       auto AttrEnd = PD->specific_attr_end<HLSLAppliedSemanticAttr>();
       auto Result =
@@ -1088,12 +1090,11 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
       SemanticValue = Result.first;
       if (!SemanticValue)
         return;
-      if (Param.hasByValAttr()) {
+      if (Param.hasByValAttr() || PD->getType()->isRecordType()) {
         llvm::Value *Var =
             CGM.getLangOpts().EmitLogicalPointer
-                ? cast<Instruction>(
-                      B.CreateStructuredAlloca(Param.getParamByValType()))
-                : cast<Instruction>(B.CreateAlloca(Param.getParamByValType()));
+                ? cast<Instruction>(B.CreateStructuredAlloca(ParamType))
+                : cast<Instruction>(B.CreateAlloca(ParamType));
         B.CreateStore(SemanticValue, Var);
         SemanticValue = Var;
       }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index c494669420282..cba1f4e9fc6ab 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -664,8 +664,9 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) {
 
   // We don't want to throw lvalue-to-rvalue casts on top of
   // expressions of certain types in C++.
+  // In HLSL LvaluetoRvalue conversion is allowed on records.
   if (getLangOpts().CPlusPlus) {
-    if (T == Context.OverloadTy || T->isRecordType() ||
+    if (T == Context.OverloadTy || (T->isRecordType() && !getLangOpts().HLSL) ||
         (T->isDependentType() && !T->isAnyPointerType() &&
          !T->isMemberPointerType()))
       return E;
@@ -16005,6 +16006,9 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   if (!LHSExpr || !RHSExpr)
     return ExprError();
 
+  const Type *LHSTy = LHSExpr->getType().getTypePtr();
+  const Type *RHSTy = RHSExpr->getType().getTypePtr();
+
   // We want to end up calling one of SemaPseudoObject::checkAssignment
   // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if
   // both expressions are overloadable or either is type-dependent),
@@ -16012,7 +16016,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   // any placeholder types out of the way.
 
   // Handle pseudo-objects in the LHS.
-  if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) {
+  if (const BuiltinType *pty = LHSTy->getAsPlaceholderType()) {
     // Assignments with a pseudo-object l-value need special analysis.
     if (pty->getKind() == BuiltinType::PseudoObject &&
         BinaryOperator::isAssignmentOp(Opc))
@@ -16068,7 +16072,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
     if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) {
       if (getLangOpts().CPlusPlus &&
           (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
-           LHSExpr->getType()->isOverloadableType()))
+           LHSTy->isOverloadableType()))
         return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
       return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr,
@@ -16077,7 +16081,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
 
     // Don't resolve overloads if the other type is overloadable.
     if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload &&
-        LHSExpr->getType()->isOverloadableType())
+        LHSTy->isOverloadableType())
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
     ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr);
@@ -16085,8 +16089,8 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
     RHSExpr = resolvedRHS.get();
   }
 
-  if (getLangOpts().HLSL && (LHSExpr->getType()->isHLSLResourceRecord() ||
-                             LHSExpr->getType()->isHLSLResourceRecordArray())) {
+  if (getLangOpts().HLSL &&
+      (LHSTy->isHLSLResourceRecord() || LHSTy->isHLSLResourceRecordArray())) {
     if (!HLSL().CheckResourceBinOp(Opc, LHSExpr, RHSExpr, OpLoc))
       return ExprError();
   }
@@ -16094,9 +16098,17 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   if (getLangOpts().CPlusPlus) {
     // Otherwise, build an overloaded op if either expression is type-dependent
     // or has an overloadable type.
+    // In HLSL, user-defined structs/classes do not have ctors, dtors or
+    // overloadable operators.
+    bool IsLHSNonOverloadableHLSLType =
+        getLangOpts().HLSL && LHSTy->isRecordType() &&
+        !LHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers();
+    bool IsRHSNonOverloadableHLSLType =
+        getLangOpts().HLSL && RHSTy->isRecordType() &&
+        !RHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers();
     if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
-        LHSExpr->getType()->isOverloadableType() ||
-        RHSExpr->getType()->isOverloadableType())
+        (LHSTy->isOverloadableType() && !IsLHSNonOverloadableHLSLType) ||
+        (RHSTy->isOverloadableType() && !IsRHSNonOverloadableHLSLType))
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
   }
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index aba1c5072a5fc..688bb2e39cea8 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -5554,14 +5554,25 @@ bool SemaHLSL::ActOnUninitializedVarDecl(VarDecl *VD) {
   if (VD->getType().getAddressSpace() == LangAS::hlsl_constant)
     return true;
 
-  // Initialize non-static resources at the global scope.
   if (VD->hasGlobalStorage() && VD->getStorageClass() != SC_Static) {
     const Type *Ty = VD->getType().getTypePtr();
-    if (Ty->isHLSLResourceRecord())
-      return initGlobalResourceDecl(VD);
-    if (Ty->isHLSLResourceRecordArray())
-      return initGlobalResourceArrayDecl(VD);
+    if (Ty->isHLSLResourceRecord() && initGlobalResourceDecl(VD))
+      return true;
+    if (Ty->isHLSLResourceRecordArray() && initGlobalResourceArrayDecl(VD))
+      return true;
   }
+
+  // User-defined structs/classes do not have constructors.
+  // When declared at the a scope, they are part of the constant buffer
+  // and should not be initialized by the compiler.
+  // When declated at a local scope, they are  by default not initialized.
+  // Also applies to arrays of user-defined structs/classes.
+  const Type *Ty = VD->getType()->getUnqualifiedDesugaredType();
+  while (Ty->isArrayType())
+    Ty = Ty->getArrayElementTypeNoTypeQual()->getUnqualifiedDesugaredType();
+  if (CXXRecordDecl *RD = Ty->getAsCXXRecordDecl())
+    return !RD->hasUserProvidedSpecialMembers();
+
   return false;
 }
 
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index e54a25405c816..2728973d38ffc 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -6927,7 +6927,11 @@ void InitializationSequence::InitializeFrom(Sema &S,
   assert(S.getLangOpts().CPlusPlus);
 
   //     - If the destination type is a (possibly cv-qualified) class type:
-  if (DestType->isRecordType()) {
+  //       (except for HLSL, where user-defined record types do not have
+  //        constructors or conversion functions)
+  if (DestType->isRecordType() &&
+      (!S.getLangOpts().HLSL ||
+       DestType->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers())) {
     //     - If the initialization is direct-initialization, or if it is
     //       copy-initialization where the cv-unqualified version of the
     //       source type is the same class as, or a derived class of, the
@@ -7012,7 +7016,11 @@ void InitializationSequence::InitializeFrom(Sema &S,
 
   //    - Otherwise, if the source type is a (possibly cv-qualified) class
   //      type, conversion functions are considered.
-  if (!SourceType.isNull() && SourceType->isRecordType()) {
+  //      (except for HLSL, where user-defined record types do not have
+  //      constructors or conversion functions).
+  if (!SourceType.isNull() && SourceType->isRecordType() &&
+      (!S.getLangOpts().HLSL ||
+       SourceType->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers())) {
     assert(Initializer && "Initializer must be non-null");
     // For a conversion to _Atomic(T) from either T or a class type derived
     // from T, initialize the T object then convert to _Atomic type.
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 96c4ce489fe04..f93313392e3e7 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1822,6 +1822,9 @@ TryImplicitConversion(Sema &S, Expr *From, QualType ToType,
   //   given Conversion rank, in spite of the fact that a copy/move
   //   constructor (i.e., a user-defined conversion function) is
   //   called for those cases.
+  // HLSL:
+  //   A conversion of an expression of class type to the same class
+  //   type needs implicit LvaluetoRvalue conversion.
   QualType FromType = From->getType();
   if (ToType->isRecordType() &&
       (S.Context.hasSameUnqualifiedType(FromType, ToType) ||
@@ -1837,6 +1840,9 @@ TryImplicitConversion(Sema &S, Expr *From, QualType ToType,
     // appropriate constructor to copy the returned object, if needed.
     ICS.Standard.CopyConstructor = nullptr;
 
+    if (S.getLangOpts().HLSL)
+      ICS.Standard.First = ICK_Lvalue_To_Rvalue;
+
     // Determine whether this is considered a derived-to-base conversion.
     if (!S.Context.hasSameUnqualifiedType(FromType, ToType))
       ICS.Standard.Second = ICK_Derived_To_Base;
@@ -15517,7 +15523,13 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
   // various built-in candidates, but as DR507 points out, this can lead to
   // problems. So we do it this way, which pretty much follows what GCC does.
   // Note that we go the traditional code path for compound assignment forms.
-  if (Opc == BO_Assign && !Args[0]->getType()->isOverloadableType())
+  // In HLSL, user-defined structs/classes do not have ctors, dtors or
+  // overloadable operators, so we can take this shortcut too.
+  const Type *LHSTy = Args[0]->getType().getTypePtr();
+  if (Opc == BO_Assign &&
+      (!LHSTy->isOverloadableType() ||
+       (getLangOpts().HLSL && LHSTy->isRecordType() &&
+        !LHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers())))
     return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
 
   // Build the overload set.
diff --git a/clang/test/AST/HLSL/cbuffer.hlsl b/clang/test/AST/HLSL/cbuffer.hlsl
index 487261af19133..0332600a073e1 100644
--- a/clang/test/AST/HLSL/cbuffer.hlsl
+++ b/clang/test/AST/HLSL/cbuffer.hlsl
@@ -151,7 +151,7 @@ cbuffer CB {
   void f() {}
   // CHECK: VarDecl {{.*}} SV 'hlsl_private float' static
   static float SV;
-  // CHECK: VarDecl {{.*}} s7 'EmptyStruct' callinit
+  // CHECK: VarDecl {{.*}} s7 'EmptyStruct'
   EmptyStruct s7;
   // CHECK: VarDecl {{.*}} Buf 'RWBuffer<float>':'hlsl::RWBuffer<float>' callinit
   RWBuffer<float> Buf;
diff --git a/clang/test/AST/HLSL/matrix-constructors.hlsl b/clang/test/AST/HLSL/matrix-constructors.hlsl
index ae61ab49f8573..a00ca1a6e392e 100644
--- a/clang/test/AST/HLSL/matrix-constructors.hlsl
+++ b/clang/test/AST/HLSL/matrix-constructors.hlsl
@@ -335,6 +335,7 @@ float2x3 G = float2x3(float2x2(1,2,3,4), 5, 6);
 
 // CHECK: VarDecl 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}, col:{{[0-9]+}}> col:{{[0-9]+}} N 'float4x4':'matrix<float, 4, 4>' cinit
 // CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}, col:{{[0-9]+}}> 'float4x4':'matrix<float, 4, 4>' functional cast to float4x4 <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}> 'sF' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}> 'sF' lvalue Var 0x{{[0-9a-fA-F]+}} 'f' 'sF'
 struct sF {
     float f[16];
@@ -385,6 +386,7 @@ float2x2 GettingStrange2 = float2x2(s3, s3);
 
 // CHECK: VarDecl 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}, col:{{[0-9]+}}> col:{{[0-9]+}} GettingStrange3 'float2x2':'matrix<float, 2, 2>' cinit
 // CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}, col:{{[0-9]+}}> 'float2x2':'matrix<float, 2, 2>' functional cast to float2x2 <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}> 'S4' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} <col:{{[0-9]+}}> 'S4' lvalue Var 0x{{[0-9a-fA-F]+}} 's4' 'S4'
 struct S4 { float4 f;};
 S4 s4;
diff --git a/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl b/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl
index e83901bb17943..def7e12785209 100644
--- a/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl
+++ b/clang/test/AST/HLSL/semantic-output-struct-shadow.hlsl
@@ -3,9 +3,9 @@
 
 
 // CHECK: CXXRecordDecl {{.*}} referenced struct S definition
-// CHECK: FieldDecl {{.*}} referenced field1 'int'
+// CHECK: FieldDecl {{.*}} field1 'int'
 // CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "A" 0
-// CHECK: FieldDecl {{.*}} referenced field2 'int'
+// CHECK: FieldDecl {{.*}} field2 'int'
 // CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "B" 4
 
 struct S {
diff --git a/clang/test/AST/HLSL/semantic-output-struct.hlsl b/clang/test/AST/HLSL/semantic-output-struct.hlsl
index 727c0f3040641..f43492bae28ff 100644
--- a/clang/test/AST/HLSL/semantic-output-struct.hlsl
+++ b/clang/test/AST/HLSL/semantic-output-struct.hlsl
@@ -3,9 +3,9 @@
 
 
 // CHECK: CXXRecordDecl {{.*}} referenced struct S definition
-// CHECK: FieldDecl {{.*}} referenced field1 'int'
+// CHECK: FieldDecl {{.*}} field1 'int'
 // CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "A" 0
-// CHECK: FieldDecl {{.*}} referenced field2 'int'
+// CHECK: FieldDecl {{.*}} field2 'int'
 // CHECK-NEXT: HLSLParsedSemanticAttr {{.*}} "B" 4
 
 struct S {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index 740b80afdb609..8d5ed58e58d74 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -157,8 +157,8 @@ struct Derived : BFields {
 // flatten from a derived struct with bitfields
 // CHECK-LABEL: call8
 // CHECK: [[A:%.*]] = alloca [4 x i32], align 4
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 1
 // CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 2
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index e9661a6e2b8be..3be7d791a9fe5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -66,6 +66,8 @@ struct UnnamedDerived : UnnamedOnly {};
 // CHECK-LABEL: define hidden void @_Z5case1v(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case1v.TF1, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -78,6 +80,8 @@ TwoFloats case1() {
 // CHECK-LABEL: define hidden void @_Z5case2v(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case2v.TF2, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -90,7 +94,9 @@ TwoFloats case2() {
 // CHECK-LABEL: define hidden void @_Z5case3i(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[VAL:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
@@ -110,7 +116,9 @@ TwoFloats case3(int Val) {
 // CHECK-LABEL: define hidden void @_Z5case4Dv2_i(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 4
@@ -133,7 +141,9 @@ TwoFloats case4(int2 TwoVals) {
 // CHECK-LABEL: define hidden void @_Z5case5Dv2_i(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 4
@@ -153,10 +163,14 @@ TwoInts case5(int2 TwoVals) {
 // Case 6: Initialization from a scalarized structure of different type with
 // different element types.
 // CHECK-LABEL: define hidden void @_Z5case69TwoFloats(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TF4:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF4_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TF4]], ptr [[TF4_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[TF4]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 1
 // CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr [[Z]], align 1
@@ -175,12 +189,26 @@ TwoInts case6(TwoFloats TF4) {
 // Case 7: Initialization of a complex structure, with bogus braces and element
 // conversions from a collection of scalar values, and structures.
 // CHECK-LABEL: define hidden void @_Z5case77TwoIntsS_i9TwoFloatsS0_S0_S0_(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI1:%.*]], ptr noundef byval([[STRUCT_TWOINTS]]) align 1 [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF3:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TI1:%.*]], ptr noundef dead_on_return [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef dead_on_return [[TF1:%.*]], ptr noundef dead_on_return [[TF2:%.*]], ptr noundef dead_on_return [[TF3:%.*]], ptr noundef dead_on_return [[TF4:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TI1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TI2_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TF1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF2_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF3_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF4_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TI1]], ptr [[TI1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TI2]], ptr [[TI2_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF1]], ptr [[TF1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF2]], ptr [[TF2_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF3]], ptr [[TF3_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF4]], ptr [[TF4_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI1]], i32 0, i32 0
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS:%.*]], ptr [[TI1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI1]], i32 0, i32 1
@@ -201,7 +229,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
-// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[TF1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[X]], align 1
 // CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 1
@@ -239,10 +267,14 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
 // Case 8: Initialization of a structure from a different structure with
 // significantly different element types and grouping.
 // CHECK-LABEL: define hidden void @_Z5case85Doggo(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ANIMALBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ANIMALBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[D1:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[D1]], ptr [[D1_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
+// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO:%.*]], ptr [[D1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[LEGSTATE]], align 1
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i64 0
 // CHECK-NEXT:    store i32 [[VECEXT]], ptr [[LEGS]], align 1
@@ -325,10 +357,16 @@ AnimalBits case8(Doggo D1) {
 // structures from different layouts, different component groupings, with no
 // top-level bracing separation.
 // CHECK-LABEL: define hidden void @_Z5case95Doggo10AnimalBits(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ZOO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]], ptr noundef byval([[STRUCT_ANIMALBITS:%.*]]) align 1 [[A1:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ZOO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[D1:%.*]], ptr noundef dead_on_return [[A1:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[A1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[D1]], ptr [[D1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[A1]], ptr [[A1_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[DOGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ZOO]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[DOGS]], i32 0, i32 0
+// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO:%.*]], ptr [[DOGS]], i32 0, i32 0
 // CHECK-NEXT:    [[LEGSTATE1:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[LEGSTATE1]], align 1
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i64 0
@@ -400,7 +438,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    store <4 x float> [[VECINIT43]], ptr [[ARRAYINIT_ELEMENT]], align 1
 // CHECK-NEXT:    [[ARRAYINIT_ELEMENT44:%.*]] = getelementptr inbounds [[STRUCT_DOGGO]], ptr [[DOGS]], i32 1
 // CHECK-NEXT:    [[LEGSTATE45:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 0
-// CHECK-NEXT:    [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 0
+// CHECK-NEXT:    [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS:%.*]], ptr [[A1]], i32 0, i32 0
 // CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[LEGS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX46]], align 1
 // CHECK-NEXT:    [[VECINIT47:%.*]] = insertelement <4 x i32> poison, i32 [[TMP14]], i32 0
@@ -741,9 +779,15 @@ Zoo case9(Doggo D1, AnimalBits A1) {
 
 // Case 10: Initialize an object with a base class from two objects.
 // CHECK-LABEL: define hidden void @_Z6case109TwoFloatsS_(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TF1:%.*]], ptr noundef dead_on_return [[TF2:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF2_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TF1]], ptr [[TF1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF2]], ptr [[TF2_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X1]], align 1
 // CHECK-NEXT:    store float [[TMP0]], ptr [[X]], align 1
@@ -770,11 +814,13 @@ FourFloats case10(TwoFloats TF1, TwoFloats TF2) {
 // CHECK-LABEL: define hidden void @_Z6case11f(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[REF_TMP1:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[REF_TMP4:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[REF_TMP7:%.*]] = alloca <4 x float>, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
@@ -819,8 +865,10 @@ FourFloats case11(float F) {
 // CHECK-LABEL: define hidden void @_Z6case12ii(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[J_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[J]], ptr [[J_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
@@ -839,9 +887,13 @@ SlicyBits case12(int I, int J) {
 
 // Case 13: Initialize bitfield from a struct of two ints.
 // CHECK-LABEL: define hidden void @_Z6case137TwoInts(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TI:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TI_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TI]], ptr [[TI_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS:%.*]], ptr [[TI]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
 // CHECK-NEXT:    store i8 [[TMP1]], ptr [[AGG_RESULT]], align 1
@@ -859,14 +911,18 @@ SlicyBits case13(TwoInts TI) {
 
 // Case 14: Initialize struct of ints from struct with bitfields.
 // CHECK-LABEL: define hidden void @_Z6case149SlicyBits(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[SB:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[SB_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[SB]], ptr [[SB_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
 // CHECK-NEXT:    store i32 [[BF_CAST]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1
-// CHECK-NEXT:    [[W1:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
+// CHECK-NEXT:    [[W1:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS:%.*]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD2:%.*]] = load i8, ptr [[W1]], align 1
 // CHECK-NEXT:    [[BF_CAST3:%.*]] = sext i8 [[BF_LOAD2]] to i32
 // CHECK-NEXT:    store i32 [[BF_CAST3]], ptr [[W]], align 1
@@ -879,15 +935,19 @@ TwoInts case14(SlicyBits SB) {
 
 // Case 15: Initialize struct of floats from struct with bitfields.
 // CHECK-LABEL: define hidden void @_Z6case159SlicyBits(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[SB:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[SB_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[SB]], ptr [[SB_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
 // CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
-// CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
+// CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS:%.*]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
 // CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
@@ -904,7 +964,9 @@ TwoFloats case15(SlicyBits SB) {
 // CHECK-LABEL: define hidden void @_Z7makeTwoRf(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noalias noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 4
 // CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]]
@@ -930,9 +992,11 @@ TwoFloats makeTwo(inout float X) {
 // CHECK-LABEL: define hidden void @_Z6case16v(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[X:%.*]] = alloca float, align 4
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_TWOFLOATS:%.*]], align 1
 // CHECK-NEXT:    [[TMP:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store float 0.000000e+00, ptr [[X]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 4
 // CHECK-NEXT:    store float [[TMP0]], ptr [[TMP]], align 4
@@ -1002,11 +1066,13 @@ void case18() {
 
 // InitList with Struct with unnamed bitfield on RHS
 // CHECK-LABEL: define hidden void @_Z6case197Unnamed(
-// CHECK-SAME: ptr noundef byval([[STRUCT_UNNAMED:%.*]]) align 1 [[U:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[U:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
-// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_UNNAMED]], ptr [[U]], i32 0, i32 0
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_UNNAMED:%.*]], ptr [[U]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 1
 // CHECK-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 1
@@ -1031,9 +1097,11 @@ void case20() {
 
 // InitList with Empty Struct on RHS
 // CHECK-LABEL: define hidden void @_Z6case215Empty(
-// CHECK-SAME: ptr noundef byval([[STRUCT_EMPTY:%.*]]) align 1 [[E:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[E:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[E_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[E]], ptr [[E_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI]], ptr align 1 @__const._Z6case215Empty.TI, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -1055,9 +1123,11 @@ void case22() {
 
 // InitList with Struct with only unnamed bitfield on RHS
 // CHECK-LABEL: define hidden void @_Z6case2311UnnamedOnly(
-// CHECK-SAME: ptr noundef byval([[STRUCT_UNNAMEDONLY:%.*]]) align 1 [[UO:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[UO:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[UO_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[UO]], ptr [[UO_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI]], ptr align 1 @__const._Z6case2311UnnamedOnly.TI, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -1082,10 +1152,14 @@ void case24() {
 }
 
 // CHECK-LABEL: define hidden void @_Z6case2512EmptyDerived14UnnamedDerived(
-// CHECK-SAME: ptr noundef byval([[STRUCT_EMPTYDERIVED:%.*]]) align 1 [[ED:%.*]], ptr noundef byval([[STRUCT_UNNAMEDDERIVED:%.*]]) align 1 [[UD:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[ED:%.*]], ptr noundef dead_on_return [[UD:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ED_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[UD_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI1:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
 // CHECK-NEXT:    [[TI2:%.*]] = alloca [[STRUCT_TWOINTS]], align 1
+// CHECK-NEXT:    store ptr [[ED]], ptr [[ED_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[UD]], ptr [[UD_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI1]], ptr align 1 @__const._Z6case2512EmptyDerived14UnnamedDerived.TI1, i32 8, i1 false)
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI2]], ptr align 1 @__const._Z6case2512EmptyDerived14UnnamedDerived.TI2, i32 8, i1 false)
 // CHECK-NEXT:    ret void
@@ -1096,11 +1170,13 @@ void case25(EmptyDerived ED, UnnamedDerived UD) {
 }
 
 // CHECK-LABEL: define hidden void @_Z6case267TwoInts(
-// CHECK-SAME: ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[TI:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TI_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[F:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[F2:%.*]] = alloca <3 x float>, align 4
-// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[TI]], ptr [[TI_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS:%.*]], ptr [[TI]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
 // CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x float> poison, float [[CONV]], i32 0
@@ -1134,9 +1210,11 @@ struct CustomResource {
 };
 
 // CHECK-LABEL: define hidden void @_Z6case2714CustomResource(
-// CHECK-SAME: ptr noundef byval([[STRUCT_CUSTOMRESOURCE:%.*]]) align 1 [[A:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_CUSTOMRESOURCE]], align 1
+// CHECK-NEXT:    [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_CUSTOMRESOURCE:%.*]], align 1
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[H:%.*]] = getelementptr inbounds nuw [[STRUCT_CUSTOMRESOURCE]], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT:    [[H1:%.*]] = getelementptr inbounds nuw [[STRUCT_CUSTOMRESOURCE]], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load target("dx.TypedBuffer", float, 1, 0, 0), ptr [[H1]], align 1
@@ -1150,11 +1228,13 @@ void case27(CustomResource a) {
 // Check cases with explicit casts
 
 // CHECK-LABEL: define hidden void @_Z6case289TwoFloats(
-// CHECK-SAME: ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[TF:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TF_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_TWOINTS]], align 1
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_TWOFLOATS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_TWOFLOATS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[TF]], ptr [[TF_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[TF]], i32 8, i1 false)
 // CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TWOINTS]], ptr [[REF_TMP]], i32 0, i32 0
 // CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_TWOINTS]], ptr [[REF_TMP]], i32 0, i32 1
@@ -1172,8 +1252,8 @@ void case27(CustomResource a) {
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 1
 // CHECK-NEXT:    [[W6:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[W6]], align 1
-// CHECK-NEXT:    store i32 [[TMP5]], ptr [[W]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[W6]], align 1
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[W]], align 1
 // CHECK-NEXT:    ret void
 //
 void case28(TwoFloats TF) {
@@ -1181,11 +1261,13 @@ void case28(TwoFloats TF) {
 }
 
 // CHECK-LABEL: define hidden void @_Z6case2910FourFloats(
-// CHECK-SAME: ptr noundef byval([[STRUCT_FOURFLOATS:%.*]]) align 1 [[FF:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[FF:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FF_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[INTS:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [2 x i32], align 4
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_FOURFLOATS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_FOURFLOATS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[FF]], ptr [[FF_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[FF]], i32 16, i1 false)
 // CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[REF_TMP]], i32 0, i32 0
 // CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x i32], ptr [[REF_TMP]], i32 0, i32 1
@@ -1204,8 +1286,8 @@ void case28(TwoFloats TF) {
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[INTS]], align 4
 // CHECK-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds i32, ptr [[INTS]], i32 1
 // CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
-// CHECK-NEXT:    store i32 [[TMP5]], ptr [[ARRAYINIT_ELEMENT]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[ARRAYINIT_ELEMENT]], align 4
 // CHECK-NEXT:    ret void
 //
 void case29(FourFloats FF) {
@@ -1239,8 +1321,8 @@ void case29(FourFloats FF) {
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 1
 // CHECK-NEXT:    [[W8:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[W8]], align 1
-// CHECK-NEXT:    store i32 [[TMP5]], ptr [[W]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[W8]], align 1
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[W]], align 1
 // CHECK-NEXT:    ret void
 //
 void case30() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
index dd9dd706aae26..a717ba5d6b2f1 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -165,11 +165,13 @@ struct Derived : BFields {
 };
 
 // CHECK-LABEL: define hidden void @_Z5call47Derived(
-// CHECK-SAME: ptr noundef byval([[STRUCT_DERIVED:%.*]]) align 1 [[D:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[D:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:  [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[A:%.*]] = alloca [2 x <2 x i32>], align 4
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED:.*]], align 1
 // CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT:    store ptr %D, ptr [[D_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
 // CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0
 // CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[GEP]], i32 0, i32 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index ab5873bfa8296..3b7111ee2fd02 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -152,8 +152,11 @@ struct Derived : BFields {
 
 // Derived Struct truncate to scalar
 // CHECK-LABEL: call9
-// CHECK: [[D2:%.*]] = alloca double, align 8
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:  [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[D2:%.*]] = alloca double, align 8
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: store ptr %D, ptr [[D_INDIRECT_ADDR]], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
@@ -200,8 +203,11 @@ export void call10(int4 I) {
 
 // truncate derived struct
 // CHECK-LABEL: call11
-// CHECK: [[B:%.*]] = alloca %struct.BFields, align 1
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:  [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[B:%.*]] = alloca %struct.BFields, align 1
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: store ptr %D, ptr [[D_INDIRECT_ADDR]], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[D]], i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0
 // CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index ad227f9fe825e..31263bca443a6 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -94,9 +94,12 @@ struct Derived : BFields {
 
 // vector flat cast from derived struct with bitfield
 // CHECK-LABEL: call6
-// CHECK: [[A:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[A:%.*]] = alloca <4 x i32>, align 4
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
 // CHECK-NEXT: [[FlatTmp:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT: store ptr %D, ptr [[D_INDIRECT_ADDR]], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
@@ -217,9 +220,12 @@ struct BoolVecStruct {
 
 // vector flat cast from struct containing bool vector
 // CHECK-LABEL: call10
-// CHECK:    [[V:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:  [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca <2 x i32>, align 4
 // CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
 // CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 4
+// CHECK-NEXT:    store ptr %s, ptr [[S_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 %s, i32 8, i1 false)
 // CHECK-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr inbounds %struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 4
diff --git a/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl b/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
index 0eb36ce8fb7bb..a98f231014797 100644
--- a/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
@@ -11,9 +11,9 @@ struct CustomResource {
 // CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", %struct.MyStruct, 0, 0)
 // CHECK: %struct.MyStruct = type { <4 x float>, <2 x i32> }
 
-// CHECK: define hidden void @_Z2fa14CustomResource(ptr noundef byval(%struct.CustomResource) align 1 %a)
-// CHECK: call void @_Z4foo114CustomResource(ptr noundef byval(%struct.CustomResource) align 1 %agg.tmp)
-// CHECK: declare hidden void @_Z4foo114CustomResource(ptr noundef byval(%struct.CustomResource) align 1)
+// CHECK: define hidden void @_Z2fa14CustomResource(ptr noundef dead_on_return %a)
+// CHECK: call void @_Z4foo114CustomResource(ptr noundef dead_on_return %byval-temp)
+// CHECK: declare hidden void @_Z4foo114CustomResource(ptr noundef dead_on_return)
 
 void foo1(CustomResource res);
 
@@ -21,7 +21,7 @@ void fa(CustomResource a) {
     foo1(a);
 }
 
-// CHECK: define hidden void @_Z2fb14CustomResource(ptr noundef byval(%struct.CustomResource) align 1 %a)
+// CHECK: define hidden void @_Z2fb14CustomResource(ptr noundef  dead_on_return %a)
 void fb(CustomResource a) {
     CustomResource b = a;
 }
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
index 2e6a7ef86c610..4e1c1b7b55984 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
@@ -18,6 +18,8 @@ RasterizerOrderedStructuredBuffer<int> Out2;
 void main(unsigned GI : SV_GroupIndex) {
   // CHECK: define void @main()
 
+  // CHECK: %[[TMP:.*]] = alloca %struct.S, align 1
+
   // DXIL: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_0_0t.i32(target("dx.RawBuffer", i32, 0, 0) %{{.*}}, i32 %{{.*}})
   // SPV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_0t.i32(target("spirv.VulkanBuffer", [0 x i32], 12, 0) %{{.*}}, i32 %{{.*}})
   // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
@@ -34,18 +36,14 @@ void main(unsigned GI : SV_GroupIndex) {
   Out2[GI] = In[GI];
 #endif
 
-  // For SPIR-V, the addrspacecast comes from `S::operator=` member function, which expects
-  // parameters in address space 0. This is why hlsl_device is a sub address
-  // space of the default address space.
-  // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
-  // SPV: %[[INCAST:.*]] = addrspacecast ptr addrspace(11) %[[INPTR]] to ptr
   // SPV: %[[OUTPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
-  // SPV: %[[OUTCAST:.*]] = addrspacecast ptr addrspace(11) %[[OUTPTR]] to ptr
-  // SPV: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[OUTCAST]], ptr align 1 %[[INCAST]], i64 4, i1 false)
+  // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
+  // SPV: call void @llvm.memcpy.p11.p11.i64(ptr addrspace(11) align 1 %[[OUTPTR]], ptr addrspace(11) align 1 %[[INPTR]], i64 4, i1 false)
+  // SPV: call void @llvm.memcpy.p0.p11.i64(ptr align 1 %[[TMP]], ptr addrspace(11) align 1 %[[OUTPTR]], i64 4, i1 false)
 
-  // For DXIL, hlsl_device and the default address space map to the same target address space. No need for an address space cast.
-  // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
   // DXIL: %[[OUTPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
+  // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
   // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %[[OUTPTR]], ptr align 1 %[[INPTR]], i32 4, i1 false)
+  // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %[[TMP]], ptr align 1 %[[OUTPTR]], i32 4, i1 false)
   RWSB3[0] = RWSB3[1];
 }
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
index 506de82412f8a..2f7dc02c6d7f2 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
@@ -1,38 +1,102 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK-DXIL
-// RUN: %clang_cc1 -triple spirv-pc-vulkan1.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK-SPIR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | \
+// RUN:     FileCheck %s -DCONST_ADDR_SPACE=2 -DPADDING_TYPE="dx.Padding"
 
-struct S {
+// RUN: %clang_cc1 -triple spirv-pc-vulkan1.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | \
+// RUN:     FileCheck %s -DCONST_ADDR_SPACE=12 -DPADDING_TYPE="spirv.Padding" --check-prefixes=CHECK,SPIRV
+
+struct P {
   float3 a;
+};
+
+struct S : P {
   float4 b;
 };
-// CHECK-DXIL-DAG: %S = type <{ <3 x float>, target("dx.Padding", 4), <4 x float> }>
-// CHECK-DXIL-DAG: %struct.S = type { <3 x float>, <4 x float> }
-// CHECK-SPIR-DAG: %S = type <{ <3 x float>, target("spirv.Padding", 4), <4 x float> }>
-// CHECK-SPIR-DAG: %struct.S = type { <3 x float>, <4 x float> }
+
+// CHECK-DAG: %S = type <{ <3 x float>, target("[[PADDING_TYPE]]", 4), <4 x float> }>
+// CHECK-DAG: %struct.P = type { <3 x float> }
+// CHECK-DAG: %struct.S = type { %struct.P, <4 x float> }
 
 cbuffer CB {
   S cbs;
 };
-// CHECK-DXIL-DAG: @cbs = external hidden addrspace(2) global %S, align 1
-// CHECK-SPIR-DAG: @cbs = external hidden addrspace(12) global %S, align 1
-
-void main() {
-  S tmp = (S)cbs;
-// CHECK-DXIL: %agg-temp = alloca %struct.S, align 1
-// CHECK-DXIL: %[[#DST:]] = getelementptr inbounds %struct.S, ptr %agg-temp, i32 0, i32 0
-// CHECK-DXIL: %cbuf.load = load <3 x float>, ptr addrspace(2) @cbs, align 4
-// CHECK-DXIL: store <3 x float> %cbuf.load, ptr %[[#DST]], align 4
-
-// CHECK-DXIL: %[[#DST:]] = getelementptr inbounds %struct.S, ptr %agg-temp, i32 0, i32 1
-// CHECK-DXIL: %cbuf.load1 = load <4 x float>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @cbs, i32 16), align 4
-// CHECK-DXIL: store <4 x float> %cbuf.load1, ptr %[[#DST]], align 4
-
-// CHECK-SPIR: %agg-temp = alloca %struct.S, align 1
-// CHECK-SPIR: %[[#DST:]] = getelementptr inbounds %struct.S, ptr %agg-temp, i32 0, i32 0
-// CHECK-SPIR: %cbuf.load = load <3 x float>, ptr addrspace(12) @cbs, align 4
-// CHECK-SPIR: store <3 x float> %cbuf.load, ptr %[[#DST]], align 4
-
-// CHECK-SPIR: %[[#DST:]] = getelementptr inbounds %struct.S, ptr %agg-temp, i32 0, i32 1
-// CHECK-SPIR: %cbuf.load1 = load <4 x float>, ptr addrspace(12) getelementptr inbounds nuw (i8, ptr addrspace(12) @cbs, i64 16), align 4
-// CHECK-SPIR: store <4 x float> %cbuf.load1, ptr %[[#DST]], align 4
+// CHECK-DAG: @cbs = external hidden addrspace([[CONST_ADDR_SPACE]]) global %S, align 1
+
+// CHECK-LABEL: case1
+// CHECK-NEXT: entry:
+// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+//
+  // Copy S field by field into local variable in default address space.
+//
+// CHECK-NEXT: [[LocalS:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[PtrA:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 0
+// CHECK-NEXT: [[CBufLoad1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
+// CHECK-NEXT: store <3 x float> [[CBufLoad1]], ptr [[PtrA]], align 4
+// CHECK-NEXT: [[PtrB:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 1
+// CHECK-NEXT: [[CBufLoad2:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 4
+// CHECK-NEXT: store <4 x float> [[CBufLoad2]], ptr [[PtrB]], align 4
+// CHECK-NEXT: ret void
+void case1() {
+  S local = cbs;
+}
+
+// CHECK-LABEL: case2
+// CHECK-NEXT: entry:
+// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+//
+// Copy S field by field into a temporary variable in default address space.
+//
+// CHECK-NEXT: [[LocalS:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[AggTemp:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[PtrA:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
+// CHECK-NEXT: [[CBufLoad1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
+// CHECK-NEXT: store <3 x float> [[CBufLoad1]], ptr [[PtrA]], align 4
+// CHECK-NEXT: [[PtrB:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+// CHECK-NEXT: [[CBufLoad2:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 4
+// CHECK-NEXT: store <4 x float> [[CBufLoad2]], ptr [[PtrB]], align 4
+//
+// The proces HLSLElementwiseCast - copy individual vector elements between the structs.
+//
+// CHECK-NEXT: [[VecGep1:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 0
+// CHECK-NEXT: [[VecGep2:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 1
+// CHECK-NEXT: [[VecGep3:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
+// CHECK-NEXT: [[VecGep4:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+
+// CHECK-NEXT: [[VecA1:%.*]] = load <3 x float>, ptr [[VecGep3]], align 4
+// CHECK-NEXT: [[Val1:%.*]] = extractelement <3 x float> [[VecA1]], i32 0
+// CHECK-NEXT: [[VecA1Ptr:%.*]] = getelementptr <3 x float>, ptr [[VecGep1]], i32 0, i32 0
+// CHECK-NEXT: store float [[Val1]], ptr [[VecA1Ptr]], align 4
+
+// CHECK-NEXT: [[VecA2:%.*]] = load <3 x float>, ptr [[VecGep3]], align 4
+// CHECK-NEXT: [[Val2:%.*]] = extractelement <3 x float> [[VecA2]], i32 1
+// CHECK-NEXT: [[VecA2Ptr:%.*]] = getelementptr <3 x float>, ptr [[VecGep1]], i32 0, i32 1
+// CHECK-NEXT: store float [[Val2]], ptr [[VecA2Ptr]], align 4
+
+// CHECK-NEXT: [[VecA3:%.*]] = load <3 x float>, ptr [[VecGep3]], align 4
+// CHECK-NEXT: [[Val3:%.*]] = extractelement <3 x float> [[VecA3]], i32 2
+// CHECK-NEXT: [[VecA3Ptr:%.*]] = getelementptr <3 x float>, ptr [[VecGep1]], i32 0, i32 2
+// CHECK-NEXT: store float [[Val3]], ptr [[VecA3Ptr]], align 4
+
+// CHECK-NEXT: [[VecB1:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
+// CHECK-NEXT: [[Val4:%.*]] = extractelement <4 x float> [[VecB1]], i32 0
+// CHECK-NEXT: [[VecB1Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 0
+// CHECK-NEXT: store float [[Val4]], ptr [[VecB1Ptr]], align 4
+
+// CHECK-NEXT: [[VecB2:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
+// CHECK-NEXT: [[Val5:%.*]] = extractelement <4 x float> [[VecB2]], i32 1
+// CHECK-NEXT: [[VecB2Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 1
+// CHECK-NEXT: store float [[Val5]], ptr [[VecB2Ptr]], align 4
+
+// CHECK-NEXT: [[VecB3:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
+// CHECK-NEXT: [[Val6:%.*]] = extractelement <4 x float> [[VecB3]], i32 2
+// CHECK-NEXT: [[VecB3Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 2
+// CHECK-NEXT: store float [[Val6]], ptr [[VecB3Ptr]], align 4
+
+// CHECK-NEXT: [[VecB4:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
+// CHECK-NEXT: [[Val7:%.*]] = extractelement <4 x float> [[VecB4]], i32 3
+// CHECK-NEXT: [[VecB4Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 3
+// CHECK-NEXT: store float [[Val7]], ptr [[VecB4Ptr]], align 4
+
+// CHECK-NEXT: ret void
+void case2() {
+  S localS = (S)cbs;
 }
diff --git a/clang/test/CodeGenHLSL/resources/resources-in-structs.hlsl b/clang/test/CodeGenHLSL/resources/resources-in-structs.hlsl
index 3502f1d7eca66..bf4c2f5558acf 100644
--- a/clang/test/CodeGenHLSL/resources/resources-in-structs.hlsl
+++ b/clang/test/CodeGenHLSL/resources/resources-in-structs.hlsl
@@ -47,7 +47,7 @@ C c : register(t10);
 
 // Check that c.BufOne is initialized from binding with counter
 //
-// CHECK: define internal void @__cxx_global_var_init.3()
+// CHECK: define internal void @__cxx_global_var_init.{{[0-9]+}}()
 // CHECK-NEXT: entry:
 // CHECK-NEXT: call void @hlsl::StructuredBuffer<float>::__createFromBinding(unsigned int, unsigned int, int, unsigned int, char const*)
 // CHECK-SAME: (ptr dead_on_unwind writable sret(%"class.hlsl::StructuredBuffer") align 4 @c.BufOne, i32 noundef 16, i32 noundef 0, i32 noundef 1, i32 noundef 0, ptr noundef @[[cBufOne]])
diff --git a/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl b/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl
index 2f8dc97ef762e..e50044811017a 100644
--- a/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl
+++ b/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl
@@ -14,8 +14,8 @@ struct Output {
 
 // Make sure SV_DispatchThreadID translated into dx.thread.id.
 
-// CHECK-DX: define hidden void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef byval(%struct.Input) align 1 %input)
-// CHECK-VK: define hidden spir_func void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef byval(%struct.Input) align 1 %input)
+// CHECK-DX: define hidden void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef dead_on_return %input)
+// CHECK-VK: define hidden spir_func void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef dead_on_return %input)
 
 // CHECK: %Idx = getelementptr inbounds nuw %struct.Input, ptr %input, i32 0, i32 0
 // CHECK: %[[#tmp:]] = load float, ptr %Idx, align 1
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index a2df307038774..5cbd7af9b91cd 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -6,10 +6,10 @@ struct Pair {
   int getFirst() {
     Pair Another = {5, 10};
     this = Another;
-      return this.First;
+    return this.First;
   }
   int getSecond() {
-    this = Pair();
+    this = {0, 123};
     return Second;
   }
   void operator=(Pair P) {
@@ -26,30 +26,30 @@ void main() {
 
 // This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
 // CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
-// CHECK-NEXT:entry:
-// CHECK-NEXT:%this.addr = alloca ptr, align 4
-// CHECK-NEXT:%Another = alloca %struct.Pair, align 1
-// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 1
-// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
-// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
-// CHECK-NEXT:store i32 5, ptr %First, align 1
-// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
-// CHECK-NEXT:store i32 10, ptr %Second, align 1
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %agg.tmp, ptr align 1 %Another, i32 8, i1 false)
-// CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 1 %agg.tmp)
-// CHECK-NEXT:%First2 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
-// CHECK-NEXT:%0 = load i32, ptr %First2, align 1
-// CHECK-NEXT:ret i32 %0
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[Another:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Another]], ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[Another]], i32 8, i1 false)
+// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
+// CHECK-NEXT: %0 = load i32, ptr [[First]], align 1
+// CHECK-NEXT: ret i32 %0
 
 // CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
-// CHECK-NEXT:%this.addr = alloca ptr, align 4
-// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 1
-// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
-// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 1 %agg.tmp, i8 0, i32 8, i1 false)
-// CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 1 %agg.tmp)
-// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
-// CHECK-NEXT:%0 = load i32, ptr %Second, align 1
-// CHECK-NEXT:ret i32 %0
+// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: store i32 0, ptr [[First]], align 1
+// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: store i32 123, ptr [[Second]], align 1
+// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
+// CHECK-NEXT: [[Second2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
+// CHECK-NEXT: %0 = load i32, ptr [[Second2]], align 1
+// CHECK-NEXT: ret i32 %0
diff --git a/clang/test/CodeGenHLSL/this-assignment.hlsl b/clang/test/CodeGenHLSL/this-assignment.hlsl
index efccc96499242..8fb0b05737b2e 100644
--- a/clang/test/CodeGenHLSL/this-assignment.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment.hlsl
@@ -12,7 +12,7 @@ struct Pair {
 
   // In HLSL 202x, this is a move assignment rather than a copy.
   int getSecond() {
-    this = Pair();
+    this = {0, 123};
     return Second;
   }
 
@@ -34,31 +34,42 @@ void main() {
 
 // This tests reference like implicit this in HLSL
 // CHECK-LABEL:     define {{.*}}getFirst
-// CHECK-NEXT:entry:
-// CHECK-NEXT:%this.addr = alloca ptr, align 4
-// CHECK-NEXT:%Another = alloca %struct.Pair, align 1
-// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
-// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %Another, ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %this1, ptr align 1 %Another, i32 8, i1 false)
-// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[Another:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Another]], ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ThisPtr]], ptr align 1 [[Another]], i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[ThisPtr]], i32 8, i1 false)
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
 
 // CHECK-LABEL:     define {{.*}}getSecond
-// CHECK-NEXT:entry:
-// CHECK-NEXT:%this.addr = alloca ptr, align 4
-// CHECK-NEXT:%ref.tmp = alloca %struct.Pair, align 1
-// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
-// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 1 %ref.tmp, i8 0, i32 8, i1 false)
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %this1, ptr align 1 %ref.tmp, i32 8, i1 false)
-// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[ThisPtrAddr:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAddr]], align 4
+// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAddr]], align 4
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
+// CHECK-NEXT: store i32 0, ptr [[First]], align 1
+// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
+// CHECK-NEXT: store i32 123, ptr [[Second]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[ThisPtr]], i32 8, i1 false)
+// CHECK-NEXT: [[Second2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
 
 // CHECK-LABEL:     define {{.*}}DoSilly
 // CHECK-NEXT:entry:
-// CHECK-NEXT: [[ThisPtrAddr:%.*]] = alloca ptr
+// CHECK-NEXT: [[ResultPtr:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[ThisPtrAddr:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[ObjIndirectAdds:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr {{.*}}, ptr [[ResultPtr]]
 // CHECK-NEXT: store ptr {{.*}}, ptr [[ThisPtrAddr]]
+// CHECK-NEXT: store ptr {{.*}}, ptr [[ObjIndirectAdds]]
 // CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAddr]]
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ThisPtr]], ptr align 1 [[Obj:%.*]], i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[ThisPtr]], i32 8, i1 false)
 // CHECK-NEXT: [[FirstAddr:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
 // CHECK-NEXT: [[First:%.*]] = load i32, ptr [[FirstAddr]]
 // CHECK-NEXT: [[FirstPlusTwo:%.*]] = add nsw i32 [[First]], 2
diff --git a/clang/test/SemaHLSL/BuiltIns/WaveActiveAllTrue-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/WaveActiveAllTrue-errors.hlsl
index b0d0fdfca5e18..cb680d01afb0f 100644
--- a/clang/test/SemaHLSL/BuiltIns/WaveActiveAllTrue-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/WaveActiveAllTrue-errors.hlsl
@@ -17,5 +17,5 @@ struct Foo
 
 bool test_type_check(Foo p0) {
   return __builtin_hlsl_wave_active_all_true(p0);
-  // expected-error at -1 {{no viable conversion from 'Foo' to 'bool'}}
+  // expected-error at -1 {{cannot initialize a parameter of type 'bool' with an lvalue of type 'Foo'}}
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/WaveActiveAnyTrue-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/WaveActiveAnyTrue-errors.hlsl
index 875aae0651702..85f0719a8a7a7 100644
--- a/clang/test/SemaHLSL/BuiltIns/WaveActiveAnyTrue-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/WaveActiveAnyTrue-errors.hlsl
@@ -17,5 +17,5 @@ struct Foo
 
 bool test_type_check(Foo p0) {
   return __builtin_hlsl_wave_active_any_true(p0);
-  // expected-error at -1 {{no viable conversion from 'Foo' to 'bool'}}
+  // expected-error at -1 {{cannot initialize a parameter of type 'bool' with an lvalue of type 'Foo'}}
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/WaveActiveBallot-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/WaveActiveBallot-errors.hlsl
index cb9e69bc89c26..802226a935596 100644
--- a/clang/test/SemaHLSL/BuiltIns/WaveActiveBallot-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/WaveActiveBallot-errors.hlsl
@@ -17,5 +17,5 @@ struct Foo
 
 uint4 test_type_check(Foo p0) {
   return __builtin_hlsl_wave_active_ballot(p0);
-  // expected-error at -1 {{no viable conversion from 'Foo' to 'bool'}}
+  // expected-error at -1 {{cannot initialize a parameter of type 'bool' with an lvalue of type 'Foo'}}
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/WaveActiveCountBits-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/WaveActiveCountBits-errors.hlsl
index 02f45eb30b377..da159c5ebd102 100644
--- a/clang/test/SemaHLSL/BuiltIns/WaveActiveCountBits-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/WaveActiveCountBits-errors.hlsl
@@ -14,5 +14,5 @@ struct S { float f; };
 
 int test_bad_conversion(S x) {
   return __builtin_hlsl_wave_active_count_bits(x);
-  // expected-error at -1 {{no viable conversion from 'S' to 'bool'}}
+  // expected-error at -1 {{cannot initialize a parameter of type 'bool' with an lvalue of type 'S'}}
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/dot4add_i8packed-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/dot4add_i8packed-errors.hlsl
index ac0b430bfaf94..58956d1c82cbe 100644
--- a/clang/test/SemaHLSL/BuiltIns/dot4add_i8packed-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/dot4add_i8packed-errors.hlsl
@@ -24,5 +24,5 @@ struct S { float f; };
 
 int test_expr_struct_type_check(S p0, int p1) {
   return __builtin_hlsl_dot4add_i8packed(p0, p1, p1);
-  // expected-error at -1 {{no viable conversion from 'S' to 'unsigned int'}}
+  // expected-error at -1 {{cannot initialize a parameter of type 'unsigned int' with an lvalue of type 'S'}}
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/dot4add_u8packed-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/dot4add_u8packed-errors.hlsl
index f1fa41902b968..cc6441cc88be3 100644
--- a/clang/test/SemaHLSL/BuiltIns/dot4add_u8packed-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/dot4add_u8packed-errors.hlsl
@@ -24,5 +24,5 @@ struct S { float f; };
 
 int test_expr_struct_type_check(S p0, uint p1) {
   return __builtin_hlsl_dot4add_u8packed(p1, p1, p0);
-  // expected-error at -1 {{no viable conversion from 'S' to 'unsigned int'}}
+  // expected-error at -1 {{cannot initialize a parameter of type 'unsigned int' with an lvalue of type 'S'}}
 }
diff --git a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
index fbb47bd2e7d39..d180611263fe4 100644
--- a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
@@ -16,7 +16,7 @@ struct R {
 // Can't cast a union
 export void cantCast2() {
   R r = (R)1;
-  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'R'}}
+  // expected-error at -1 {{C-style cast from 'int' to 'R' is not allowed}}
 }
 
 RWBuffer<float4> Buf;
@@ -39,5 +39,5 @@ struct X {
 
 export void cantCast5() {
   X x = (X)1;
-  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'X'}}
+  // expected-error at -1 {{C-style cast from 'int' to 'X' is not allowed}}
 }
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
index d9f50e9b0307f..b8fb1b4596d2a 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
@@ -8,9 +8,6 @@ export void cantCast() {
 }
 
 struct R {
-// expected-note at -1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const R' for 1st argument}}
-// expected-note at -2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'R' for 1st argument}}
-// expected-note at -3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}}
   int A;
   union {
     float F;
@@ -21,10 +18,10 @@ struct R {
 export void cantCast4() {
   int2 A = {1,2};
   R r = R(A);
-  // expected-error at -1 {{no matching conversion for functional-style cast from 'int2' (aka 'vector<int, 2>') to 'R'}}
+  // expected-error at -1 {{functional-style cast from 'int2' (aka 'vector<int, 2>') to 'R' is not allowed}}
   R r2;
   r2.A = 1;
   r2.F = 2.0;
   int2 B = (int2)r2;
-  // expected-error at -1 {{cannot convert 'R' to 'int2' (aka 'vector<int, 2>') without a conversion operator}}
+  // expected-error at -1 {{C-style cast from 'R' to 'int2' (aka 'vector<int, 2>') is not allowed}}
 }
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl
index 8481cfc1b18e2..9d296a9798a9c 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl
@@ -30,6 +30,7 @@ struct S {
 // cast from a struct
 // CHECK-LABEL: call3
 // CHECK: CStyleCastExpr {{.*}} 'int[2]' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'S' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue Var {{.*}} 'SS' 'S'
 export void call3() {
   S SS = {1,1.0};
diff --git a/clang/test/SemaHLSL/Language/InitListAST.hlsl b/clang/test/SemaHLSL/Language/InitListAST.hlsl
index 62acaf3046548..b3325b37a8939 100644
--- a/clang/test/SemaHLSL/Language/InitListAST.hlsl
+++ b/clang/test/SemaHLSL/Language/InitListAST.hlsl
@@ -1076,12 +1076,14 @@ float case17() {
 // CHECK-NEXT: OpaqueValueExpr [[OPV0:0x[0-9a-f]+]] {{.*}} 'TwoInts' xvalue
 // CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'TwoInts' xvalue
 // CHECK-NEXT: CStyleCastExpr {{.*}} 'TwoInts' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'TwoFloats' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'TwoFloats' lvalue Var {{.*}} 'TF' 'TwoFloats'
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
 // CHECK-NEXT: MemberExpr {{.*}} 'int' xvalue .W
 // CHECK-NEXT: OpaqueValueExpr [[OPV0]] {{.*}} 'TwoInts' xvalue
 // CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'TwoInts' xvalue
 // CHECK-NEXT: CStyleCastExpr {{.*}} 'TwoInts' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'TwoFloats' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'TwoFloats' lvalue Var {{.*}} 'TF' 'TwoFloats'
 int case18() {
   TwoFloats TF = {1.0,2.0};
@@ -1107,6 +1109,7 @@ int case18() {
 // CHECK-NEXT: OpaqueValueExpr [[OPV1:0x[0-9a-f]+]] {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: CStyleCastExpr {{.*}} 'int[4]' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'FourFloats' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'FourFloats' lvalue Var {{.*}} 'FF' 'FourFloats'
 // CHECK-NEXT: IntegerLiteral {{.*}} '__size_t':'unsigned long' 0
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
@@ -1115,6 +1118,7 @@ int case18() {
 // CHECK-NEXT: OpaqueValueExpr [[OPV1]] {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: CStyleCastExpr {{.*}} 'int[4]' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'FourFloats' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'FourFloats' lvalue Var {{.*}} 'FF' 'FourFloats'
 // CHECK-NEXT: IntegerLiteral {{.*}} '__size_t':'unsigned long' 1
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
@@ -1123,6 +1127,7 @@ int case18() {
 // CHECK-NEXT: OpaqueValueExpr [[OPV1]] {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: CStyleCastExpr {{.*}} 'int[4]' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'FourFloats' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'FourFloats' lvalue Var {{.*}} 'FF' 'FourFloats'
 // CHECK-NEXT: IntegerLiteral {{.*}} '__size_t':'unsigned long' 2
 // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
@@ -1131,6 +1136,7 @@ int case18() {
 // CHECK-NEXT: OpaqueValueExpr [[OPV1]] {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'int[4]' xvalue
 // CHECK-NEXT: CStyleCastExpr {{.*}} 'int[4]' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'FourFloats' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'FourFloats' lvalue Var {{.*}} 'FF' 'FourFloats'
 // CHECK-NEXT: IntegerLiteral {{.*}} '__size_t':'unsigned long' 3
 int case19() {
diff --git a/clang/test/SemaHLSL/Language/InitLists.hlsl b/clang/test/SemaHLSL/Language/InitLists.hlsl
index c31c0fde33f30..1ea9ddb2aff26 100644
--- a/clang/test/SemaHLSL/Language/InitLists.hlsl
+++ b/clang/test/SemaHLSL/Language/InitLists.hlsl
@@ -106,20 +106,14 @@ struct R {
   };
 };
 
-// expected-note@#anon{{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to}}
-// expected-note@#anon{{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to}}
-
 void Err2(RWBuffer<float4> B) {
   ContainsResource RS1 = {1, B};
-  ContainsResource RS2 = (1.xx); // expected-error{{no viable conversion from 'vector<int, 2>' (vector of 2 'int' values) to 'ContainsResource'}}
+  ContainsResource RS2 = (1.xx); // expected-error{{cannot initialize a variable of type 'ContainsResource' with an rvalue of type 'vector<int, 2>' (vector of 2 'int' values)}}
   ContainsResource RS3 = {B, 1}; // expected-error{{no viable conversion from 'RWBuffer<float4>' (aka 'RWBuffer<vector<float, 4>>') to 'int'}}
   ContainsResourceInverted IR = {RS1}; // expected-error{{no viable conversion from 'int' to 'hlsl::RWBuffer<vector<float, 4>>'}}
 
-  R r = {1,2}; // expected-error{{no viable conversion from 'int' to 'R::(anonymous union at}}
+  R r = {1,2}; // expected-error-re{{cannot initialize a parameter of type 'R::(anonymous union at {{.+}} with an rvalue of type 'int'}}
 }
 
-// expected-note@#ContainsResource{{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'vector<int, 2>' (vector of 2 'int' values) to 'const ContainsResource &' for 1st argument}}
-// expected-note@#ContainsResource{{candidate constructor (the implicit move constructor) not viable: no known conversion from 'vector<int, 2>' (vector of 2 'int' values) to 'ContainsResource &&' for 1st argument}}
-
 // This note refers to the RWBuffer copy constructor that do not have a source locations
 // expected-note@*{{candidate constructor not viable}}
diff --git a/clang/test/SemaHLSL/prohibit_pointer.hlsl b/clang/test/SemaHLSL/prohibit_pointer.hlsl
index 76c017150f9d5..84e1e10c45820 100644
--- a/clang/test/SemaHLSL/prohibit_pointer.hlsl
+++ b/clang/test/SemaHLSL/prohibit_pointer.hlsl
@@ -68,7 +68,7 @@ struct Fish {
 
   // expected-note at +1 {{'->' applied to return value of the operator->() declared here}}
   Fins operator ->() {
-    return Fins();
+    return Fins(); // expected-error {{no matching constructor for initialization of 'Fins'}}
   }
 };
 

>From 3691fbaafbfa780563e9c3bea0120fa8452af8e8 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 30 Apr 2026 09:47:21 -0700
Subject: [PATCH 02/21] Update test

---
 .../CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index 8d5ed58e58d74..b76c36de8b418 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -156,9 +156,12 @@ struct Derived : BFields {
 
 // flatten from a derived struct with bitfields
 // CHECK-LABEL: call8
-// CHECK: [[A:%.*]] = alloca [4 x i32], align 4
-// CHECK: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DIndirectAddr:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: store ptr %D, ptr [[DIndirectAddr]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 1
 // CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 2

>From 000180a3753ebf1caf84a66f10bfb948202cee08 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 30 Apr 2026 10:07:54 -0700
Subject: [PATCH 03/21] Fix ObjCXX test failure

---
 clang/lib/Sema/SemaExpr.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cba1f4e9fc6ab..fdabed7472d34 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16006,9 +16006,6 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   if (!LHSExpr || !RHSExpr)
     return ExprError();
 
-  const Type *LHSTy = LHSExpr->getType().getTypePtr();
-  const Type *RHSTy = RHSExpr->getType().getTypePtr();
-
   // We want to end up calling one of SemaPseudoObject::checkAssignment
   // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if
   // both expressions are overloadable or either is type-dependent),
@@ -16016,7 +16013,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   // any placeholder types out of the way.
 
   // Handle pseudo-objects in the LHS.
-  if (const BuiltinType *pty = LHSTy->getAsPlaceholderType()) {
+  if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) {
     // Assignments with a pseudo-object l-value need special analysis.
     if (pty->getKind() == BuiltinType::PseudoObject &&
         BinaryOperator::isAssignmentOp(Opc))
@@ -16072,7 +16069,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
     if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) {
       if (getLangOpts().CPlusPlus &&
           (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
-           LHSTy->isOverloadableType()))
+           LHSExpr->getType()->isOverloadableType()))
         return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
       return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr,
@@ -16081,7 +16078,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
 
     // Don't resolve overloads if the other type is overloadable.
     if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload &&
-        LHSTy->isOverloadableType())
+        LHSExpr->getType()->isOverloadableType())
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
 
     ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr);
@@ -16090,7 +16087,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   }
 
   if (getLangOpts().HLSL &&
-      (LHSTy->isHLSLResourceRecord() || LHSTy->isHLSLResourceRecordArray())) {
+      (LHSExpr->getType()->isHLSLResourceRecord() || LHSExpr->getType()->isHLSLResourceRecordArray())) {
     if (!HLSL().CheckResourceBinOp(Opc, LHSExpr, RHSExpr, OpLoc))
       return ExprError();
   }
@@ -16100,6 +16097,8 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
     // or has an overloadable type.
     // In HLSL, user-defined structs/classes do not have ctors, dtors or
     // overloadable operators.
+    QualType LHSTy = LHSExpr->getType();
+    QualType RHSTy = RHSExpr->getType();
     bool IsLHSNonOverloadableHLSLType =
         getLangOpts().HLSL && LHSTy->isRecordType() &&
         !LHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers();

>From 5aa083e16a31572d07809ff7f546121e735a85ba Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 30 Apr 2026 11:02:29 -0700
Subject: [PATCH 04/21] clang-format

---
 clang/lib/Sema/SemaExpr.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fdabed7472d34..1b796354c1532 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16086,8 +16086,8 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
     RHSExpr = resolvedRHS.get();
   }
 
-  if (getLangOpts().HLSL &&
-      (LHSExpr->getType()->isHLSLResourceRecord() || LHSExpr->getType()->isHLSLResourceRecordArray())) {
+  if (getLangOpts().HLSL && (LHSExpr->getType()->isHLSLResourceRecord() ||
+                             LHSExpr->getType()->isHLSLResourceRecordArray())) {
     if (!HLSL().CheckResourceBinOp(Opc, LHSExpr, RHSExpr, OpLoc))
       return ExprError();
   }

>From a18b5a926bfe07ad1cff5ddfb047801a6c2fbd05 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 30 Apr 2026 17:34:47 -0700
Subject: [PATCH 05/21] cleanup comments

---
 clang/lib/CodeGen/CGDecl.cpp    | 2 +-
 clang/lib/Sema/SemaExpr.cpp     | 2 +-
 clang/lib/Sema/SemaHLSL.cpp     | 4 ++--
 clang/lib/Sema/SemaOverload.cpp | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 28a14072982e8..c6b7ec7ee15c6 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1531,7 +1531,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
          ((Ty.isPODType(getContext()) || BaseTy->isObjCObjectPointerType() ||
            // If HLSL, check if it's a constant initializer anyway because
            // POD-ness will no longer be true for user defined structs
-           // (since they cannot have constructors or a destructor).
+           // (since they do not have constructors).
            (getLangOpts().HLSL && BaseTy->isRecordType())) &&
           D.getInit()->isConstantInitializer(getContext())))) {
 
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 1b796354c1532..5c32697b5195a 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16095,7 +16095,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   if (getLangOpts().CPlusPlus) {
     // Otherwise, build an overloaded op if either expression is type-dependent
     // or has an overloadable type.
-    // In HLSL, user-defined structs/classes do not have ctors, dtors or
+    // In HLSL, user-defined structs/classes do not have ctors or
     // overloadable operators.
     QualType LHSTy = LHSExpr->getType();
     QualType RHSTy = RHSExpr->getType();
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 688bb2e39cea8..2a45ce415a435 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -5563,9 +5563,9 @@ bool SemaHLSL::ActOnUninitializedVarDecl(VarDecl *VD) {
   }
 
   // User-defined structs/classes do not have constructors.
-  // When declared at the a scope, they are part of the constant buffer
+  // When declared at a global scope, they are part of the constant buffer
   // and should not be initialized by the compiler.
-  // When declated at a local scope, they are  by default not initialized.
+  // When declared at a local scope, they are not initialized.
   // Also applies to arrays of user-defined structs/classes.
   const Type *Ty = VD->getType()->getUnqualifiedDesugaredType();
   while (Ty->isArrayType())
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index f93313392e3e7..cdbcc70fa74d7 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15523,8 +15523,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
   // various built-in candidates, but as DR507 points out, this can lead to
   // problems. So we do it this way, which pretty much follows what GCC does.
   // Note that we go the traditional code path for compound assignment forms.
-  // In HLSL, user-defined structs/classes do not have ctors, dtors or
-  // overloadable operators, so we can take this shortcut too.
+  // In HLSL, user-defined structs/classes do not have  or overloadable
+  // operators, so we can take this shortcut too.
   const Type *LHSTy = Args[0]->getType().getTypePtr();
   if (Opc == BO_Assign &&
       (!LHSTy->isOverloadableType() ||

>From cacebdde4879156a4a2810f987cb469a6f03ae75 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 1 May 2026 12:21:20 -0700
Subject: [PATCH 06/21] Add test for cbuffer struct with array and embedded
 struct

---
 .../resources/cbuffer_struct_passing.hlsl     | 200 +++++++++++++-----
 1 file changed, 144 insertions(+), 56 deletions(-)

diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
index 2f7dc02c6d7f2..7858fbcf68543 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
@@ -9,15 +9,25 @@ struct P {
 };
 
 struct S : P {
-  float4 b;
+  double b;
+  float4 c;
 };
 
-// CHECK-DAG: %S = type <{ <3 x float>, target("[[PADDING_TYPE]]", 4), <4 x float> }>
+struct T {
+  S s;
+  int arr[2];
+};
+
+// CHECK-DAG: %__cblayout_CB = type <{ %S, %T }>
+// CHECK-DAG: %S = type <{ <3 x float>, target("[[PADDING_TYPE]]", 4), double, target("[[PADDING_TYPE]]", 8), <4 x float> }>
+// CHECK-DAG: %T = type <{ %S, <{ [1 x <{ i32, target("[[PADDING_TYPE]]", 12) }>], i32 }> }>
+// CHECK-DAG: %struct.S = type <{ %struct.P, double, <4 x float> }>
 // CHECK-DAG: %struct.P = type { <3 x float> }
-// CHECK-DAG: %struct.S = type { %struct.P, <4 x float> }
+// CHECK-DAG: %struct.T = type { %struct.S, [2 x i32] }
 
 cbuffer CB {
   S cbs;
+  T cbt;
 };
 // CHECK-DAG: @cbs = external hidden addrspace([[CONST_ADDR_SPACE]]) global %S, align 1
 
@@ -27,13 +37,19 @@ cbuffer CB {
 //
   // Copy S field by field into local variable in default address space.
 //
-// CHECK-NEXT: [[LocalS:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[PtrA:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 0
-// CHECK-NEXT: [[CBufLoad1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
-// CHECK-NEXT: store <3 x float> [[CBufLoad1]], ptr [[PtrA]], align 4
-// CHECK-NEXT: [[PtrB:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 1
-// CHECK-NEXT: [[CBufLoad2:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 4
-// CHECK-NEXT: store <4 x float> [[CBufLoad2]], ptr [[PtrB]], align 4
+// CHECK-NEXT: [[AggTemp:%.*]] = alloca %struct.S, align 1
+
+// CHECK-NEXT: [[Ptr_a:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
+// CHECK-NEXT: [[CBufLoad_a:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
+// CHECK-NEXT: store <3 x float> [[CBufLoad_a]], ptr [[Ptr_a]], align 4
+
+// CHECK-NEXT: [[Ptr_b:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+// CHECK-NEXT: [[CBufLoad_b:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 8
+// CHECK-NEXT: store double [[CBufLoad_b]], ptr [[Ptr_b]], align 8
+
+// CHECK-NEXT: [[Ptr_c:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 2
+// CHECK-NEXT: [[CBufLoad_c:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 32), align 4
+// CHECK-NEXT: store <4 x float> [[CBufLoad_c]], ptr [[Ptr_c]], align 4
 // CHECK-NEXT: ret void
 void case1() {
   S local = cbs;
@@ -47,56 +63,128 @@ void case1() {
 //
 // CHECK-NEXT: [[LocalS:%.*]] = alloca %struct.S, align 1
 // CHECK-NEXT: [[AggTemp:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[PtrA:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
-// CHECK-NEXT: [[CBufLoad1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
-// CHECK-NEXT: store <3 x float> [[CBufLoad1]], ptr [[PtrA]], align 4
-// CHECK-NEXT: [[PtrB:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
-// CHECK-NEXT: [[CBufLoad2:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 4
-// CHECK-NEXT: store <4 x float> [[CBufLoad2]], ptr [[PtrB]], align 4
+
+// CHECK-NEXT: [[Ptr_a:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
+// CHECK-NEXT: [[CBufLoad_a:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
+// CHECK-NEXT: store <3 x float> [[CBufLoad_a]], ptr [[Ptr_a]], align 4
+
+// CHECK-NEXT: [[Ptr_b:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+// CHECK-NEXT: [[CBufLoad_b:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 8
+// CHECK-NEXT: store double [[CBufLoad_b]], ptr [[Ptr_b]], align 8
+
+// CHECK-NEXT: [[Ptr_c:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 2
+// CHECK-NEXT: [[CBufLoad_c:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 32), align 4
+// CHECK-NEXT: store <4 x float> [[CBufLoad_c]], ptr [[Ptr_c]], align 4
 //
 // The proces HLSLElementwiseCast - copy individual vector elements between the structs.
 //
-// CHECK-NEXT: [[VecGep1:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 0
-// CHECK-NEXT: [[VecGep2:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 1
-// CHECK-NEXT: [[VecGep3:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
-// CHECK-NEXT: [[VecGep4:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
-
-// CHECK-NEXT: [[VecA1:%.*]] = load <3 x float>, ptr [[VecGep3]], align 4
-// CHECK-NEXT: [[Val1:%.*]] = extractelement <3 x float> [[VecA1]], i32 0
-// CHECK-NEXT: [[VecA1Ptr:%.*]] = getelementptr <3 x float>, ptr [[VecGep1]], i32 0, i32 0
-// CHECK-NEXT: store float [[Val1]], ptr [[VecA1Ptr]], align 4
-
-// CHECK-NEXT: [[VecA2:%.*]] = load <3 x float>, ptr [[VecGep3]], align 4
-// CHECK-NEXT: [[Val2:%.*]] = extractelement <3 x float> [[VecA2]], i32 1
-// CHECK-NEXT: [[VecA2Ptr:%.*]] = getelementptr <3 x float>, ptr [[VecGep1]], i32 0, i32 1
-// CHECK-NEXT: store float [[Val2]], ptr [[VecA2Ptr]], align 4
-
-// CHECK-NEXT: [[VecA3:%.*]] = load <3 x float>, ptr [[VecGep3]], align 4
-// CHECK-NEXT: [[Val3:%.*]] = extractelement <3 x float> [[VecA3]], i32 2
-// CHECK-NEXT: [[VecA3Ptr:%.*]] = getelementptr <3 x float>, ptr [[VecGep1]], i32 0, i32 2
-// CHECK-NEXT: store float [[Val3]], ptr [[VecA3Ptr]], align 4
-
-// CHECK-NEXT: [[VecB1:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
-// CHECK-NEXT: [[Val4:%.*]] = extractelement <4 x float> [[VecB1]], i32 0
-// CHECK-NEXT: [[VecB1Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 0
-// CHECK-NEXT: store float [[Val4]], ptr [[VecB1Ptr]], align 4
-
-// CHECK-NEXT: [[VecB2:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
-// CHECK-NEXT: [[Val5:%.*]] = extractelement <4 x float> [[VecB2]], i32 1
-// CHECK-NEXT: [[VecB2Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 1
-// CHECK-NEXT: store float [[Val5]], ptr [[VecB2Ptr]], align 4
-
-// CHECK-NEXT: [[VecB3:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
-// CHECK-NEXT: [[Val6:%.*]] = extractelement <4 x float> [[VecB3]], i32 2
-// CHECK-NEXT: [[VecB3Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 2
-// CHECK-NEXT: store float [[Val6]], ptr [[VecB3Ptr]], align 4
-
-// CHECK-NEXT: [[VecB4:%.*]] = load <4 x float>, ptr [[VecGep4]], align 4
-// CHECK-NEXT: [[Val7:%.*]] = extractelement <4 x float> [[VecB4]], i32 3
-// CHECK-NEXT: [[VecB4Ptr:%.*]] = getelementptr <4 x float>, ptr [[VecGep2]], i32 0, i32 3
-// CHECK-NEXT: store float [[Val7]], ptr [[VecB4Ptr]], align 4
+// CHECK-NEXT: [[Ptr_LocalS_a:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Ptr_LocalS_b:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 1
+// CHECK-NEXT: [[Ptr_LocalS_c:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 2
+// CHECK-NEXT: [[Ptr_AggTemp_a:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[Ptr_AggTemp_b:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+// CHECK-NEXT: [[Ptr_AggTemp_c:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 2
+
+// CHECK-NEXT: [[AggTemp_a:%.*]] = load <3 x float>, ptr [[Ptr_AggTemp_a]], align 4
+// CHECK-NEXT: [[Val_a0:%.*]] = extractelement <3 x float> [[AggTemp_a]], i32 0
+// CHECK-NEXT: [[Ptr_LocalS_a0:%.*]] = getelementptr <3 x float>, ptr [[Ptr_LocalS_a]], i32 0, i32 0
+// CHECK-NEXT: store float [[Val_a0]], ptr [[Ptr_LocalS_a0]], align 4
+
+// CHECK-NEXT: [[AggTemp_a:%.*]] = load <3 x float>, ptr [[Ptr_AggTemp_a]], align 4
+// CHECK-NEXT: [[Val_a1:%.*]] = extractelement <3 x float> [[AggTemp_a]], i32 1
+// CHECK-NEXT: [[Ptr_LocalS_a1:%.*]] = getelementptr <3 x float>, ptr [[Ptr_LocalS_a]], i32 0, i32 1
+// CHECK-NEXT: store float [[Val_a1]], ptr [[Ptr_LocalS_a1]], align 4
+
+// CHECK-NEXT: [[AggTemp_a:%.*]] = load <3 x float>, ptr [[Ptr_AggTemp_a]], align 4
+// CHECK-NEXT: [[Val_a2:%.*]] = extractelement <3 x float> [[AggTemp_a]], i32 2
+// CHECK-NEXT: [[Ptr_LocalS_a2:%.*]] = getelementptr <3 x float>, ptr [[Ptr_LocalS_a]], i32 0, i32 2
+// CHECK-NEXT: store float [[Val_a2]], ptr [[Ptr_LocalS_a2]], align 4
+
+// CHECK-NEXT: [[Val_b:%.*]] = load double, ptr [[Ptr_AggTemp_b]], align 8
+// CHECK-NEXT: store double [[Val_b]], ptr [[Ptr_LocalS_b]], align 8
+
+// CHECK-NEXT: [[AggTemp_c:%.*]] = load <4 x float>, ptr [[Ptr_AggTemp_c]], align 4
+// CHECK-NEXT: [[Val_c0:%.*]] = extractelement <4 x float> [[AggTemp_c]], i32 0
+// CHECK-NEXT: [[Ptr_LocalS_c0:%.*]] = getelementptr <4 x float>, ptr [[Ptr_LocalS_c]], i32 0, i32 0
+// CHECK-NEXT: store float [[Val_c0]], ptr [[Ptr_LocalS_c0]], align 4
+
+// CHECK-NEXT: [[AggTemp_c:%.*]] = load <4 x float>, ptr [[Ptr_AggTemp_c]], align 4
+// CHECK-NEXT: [[Val_c1:%.*]] = extractelement <4 x float> [[AggTemp_c]], i32 1
+// CHECK-NEXT: [[Ptr_LocalS_c1:%.*]] = getelementptr <4 x float>, ptr [[Ptr_LocalS_c]], i32 0, i32 1
+// CHECK-NEXT: store float [[Val_c1]], ptr [[Ptr_LocalS_c1]], align 4
+
+// CHECK-NEXT: [[AggTemp_c:%.*]] = load <4 x float>, ptr [[Ptr_AggTemp_c]], align 4
+// CHECK-NEXT: [[Val_c2:%.*]] = extractelement <4 x float> [[AggTemp_c]], i32 2
+// CHECK-NEXT: [[Ptr_LocalS_c2:%.*]] = getelementptr <4 x float>, ptr [[Ptr_LocalS_c]], i32 0, i32 2
+// CHECK-NEXT: store float [[Val_c2]], ptr [[Ptr_LocalS_c2]], align 4
+
+// CHECK-NEXT: [[AggTemp_c:%.*]] = load <4 x float>, ptr [[Ptr_AggTemp_c]], align 4
+// CHECK-NEXT: [[Val_c3:%.*]] = extractelement <4 x float> [[AggTemp_c]], i32 3
+// CHECK-NEXT: [[Ptr_LocalS_c3:%.*]] = getelementptr <4 x float>, ptr [[Ptr_LocalS_c]], i32 0, i32 3
+// CHECK-NEXT: store float [[Val_c3]], ptr [[Ptr_LocalS_c3]], align 4
 
 // CHECK-NEXT: ret void
 void case2() {
-  S localS = (S)cbs;
+  S AggTemp = (S)cbs;
+}
+
+// CHECK-LABEL: case3
+// CHECK-NEXT: entry:
+// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+void case3() {
+
+// CHECK-NEXT: [[LocalT:%.*]] = alloca %struct.T, align 1
+// CHECK-NEXT: [[LocalTCopy:%.*]] = alloca %struct.T, align 1
+// CHECK-NEXT: [[AggTemp:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[AggTempCopy:%.*]] = alloca %struct.S, align 1
+
+// Check that constant to default address space copies the struct field by field
+//
+// CHECK-NEXT: [[Ptr_s:%.*]] = getelementptr inbounds %struct.T, ptr [[LocalT]], i32 0, i32 0
+// CHECK-NEXT: [[Ptr_a:%.*]] = getelementptr inbounds %struct.S, ptr [[Ptr_s]], i32 0, i32 0
+// CHECK-NEXT: [[CbufLoad_a:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, align 4
+// CHECK-NEXT: store <3 x float> [[CbufLoad_a]], ptr [[Ptr_a]], align 4
+
+// CHECK-NEXT: [[Ptr_b:%.*]] = getelementptr inbounds %struct.S, ptr [[Ptr_s]], i32 0, i32 1
+// CHECK-NEXT: [[CbufLoad_c:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 16), align 8
+// CHECK-NEXT: store double [[CbufLoad_c]], ptr [[Ptr_b]], align 8
+
+// CHECK-NEXT: [[Ptr_c:%.*]] = getelementptr inbounds %struct.S, ptr [[Ptr_s]], i32 0, i32 2
+// CHECK-NEXT: [[CbufLoad_b:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 32), align 4
+// CHECK-NEXT: store <4 x float> [[CbufLoad_b]], ptr [[Ptr_c]], align 4
+  
+// CHECK-NEXT: [[Ptr_arr:%.*]] = getelementptr inbounds %struct.T, ptr [[LocalT]], i32 0, i32 1
+// CHECK-NEXT: [[Ptr_arr0:%.*]] = getelementptr inbounds [2 x i32], ptr [[Ptr_arr]], i32 0, i32 0
+// CHECK-NEXT: [[CbufLoad_arr0:%.*]] = load i32, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 48), align 4
+// CHECK-NEXT: store i32 [[CbufLoad_arr0]], ptr [[Ptr_arr0]], align 4
+
+// CHECK-NEXT: [[Ptr_arr1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Ptr_arr]], i32 0, i32 1
+// CHECK-NEXT: [[CbufLoad_arr1:%.*]] = load i32, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 64), align 4
+// CHECK-NEXT: store i32 [[CbufLoad_arr1]], ptr [[Ptr_arr1]], align 4
+  T localT = cbt;
+
+// Check that default to default address space copy uses memcpy
+//
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[LocalTCopy]], ptr align 1 [[LocalT]], {{i32|i64}} 44, i1 false)
+  T localTCopy = localT;
+
+// Check that constant to default address space copies the struct field by field
+//
+// CHECK-NEXT: [[Ptr_a1:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
+// CHECK-NEXT: [[CbufLoad_a1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, align 4
+// CHECK-NEXT: store <3 x float> [[CbufLoad_a1]], ptr [[Ptr_a1]], align 4
+// CHECK-NEXT: [[Ptr_b1:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+// CHECK-NEXT: [[CbufLoad_b1:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 16), align 8
+// CHECK-NEXT: store double [[CbufLoad_b1]], ptr [[Ptr_b1]], align 8
+// CHECK-NEXT: [[Ptr_c1:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 2
+// CHECK-NEXT: [[CbufLoad_c1:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 32), align 4
+// CHECK-NEXT: store <4 x float> [[CbufLoad_c1]], ptr [[Ptr_c1]], align 4
+  S AggTemp = cbt.s;
+
+// Check that default to default address space copy uses memcpy
+//
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[AggTempCopy]], ptr align 1 [[AggTemp]], {{i32|i64}} 36, i1 false)
+  S AggTempCopy = AggTemp;
+
+// CHECK-NEXT: ret void
 }

>From b8ff4c1453c1c8021f0562d79880c35094660def Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Wed, 6 May 2026 10:56:56 -0700
Subject: [PATCH 07/21] code review feedback

---
 clang/include/clang/AST/DeclCXX.h   | 81 +++++++++++++++--------------
 clang/lib/CodeGen/CGHLSLRuntime.cpp | 12 +++--
 clang/lib/Sema/SemaOverload.cpp     |  4 +-
 3 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index fe2bc0dd628c3..2a73e56f2465f 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -768,9 +768,7 @@ class CXXRecordDecl : public RecordDecl {
   //
   /// Determines whether this class has any user provided special members.
   bool hasUserProvidedSpecialMembers() const {
-    return data().UserDeclaredSpecialMembers &
-               (SMF_MoveConstructor | SMF_MoveAssignment | SMF_Destructor |
-                SMF_CopyAssignment | SMF_CopyConstructor) ||
+    return data().UserDeclaredSpecialMembers & SMF_All ||
            data().UserDeclaredConstructor ||
            data().UserProvidedDefaultConstructor;
   }
@@ -780,19 +778,19 @@ class CXXRecordDecl : public RecordDecl {
   ///
   /// This value is used for lazy creation of default constructors.
   bool needsImplicitDefaultConstructor() const {
-    return ((!data().UserDeclaredConstructor &&
-             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
-             (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
-            // FIXME: Proposed fix to core wording issue: if a class inherits
-            // a default constructor and doesn't explicitly declare one, one
-            // is declared implicitly.
-            (data().HasInheritedDefaultConstructor &&
-             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
-           // In HLSL, only built-in records like resources classes can have
-           // constructors.
-           (!getLangOpts().HLSL ||
-            (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
-            hasUserProvidedSpecialMembers());
+    // In HLSL, only built-in records like resources classes can have
+    // constructors and overloadable operators.
+    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
+      return false;
+
+    return (!data().UserDeclaredConstructor &&
+            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
+            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
+           // FIXME: Proposed fix to core wording issue: if a class inherits
+           // a default constructor and doesn't explicitly declare one, one
+           // is declared implicitly.
+           (data().HasInheritedDefaultConstructor &&
+            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor));
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -818,11 +816,12 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
-    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor) &&
-           // In HLSL, only built-in records like resources classes can have
-           // constructors.
-           (!getLangOpts().HLSL || isLambda() ||
-            hasUserProvidedSpecialMembers());
+    // In HLSL, only built-in records like resources classes can have
+    // constructors and overloadable operators.
+    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
+      return false;
+
+    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor);
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -915,14 +914,16 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class should get an implicit move
   /// constructor or if any existing special member function inhibits this.
   bool needsImplicitMoveConstructor() const {
+    // In HLSL, only built-in records like resources classes can have
+    // constructors and overloadable operators.
+    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
+      return false;
+
     return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveAssignment() && !hasUserDeclaredDestructor() &&
-           // In HLSL, only built-in records like resources classes can have
-           // constructors.
-           (!getLangOpts().HLSL || isLambda() ||
-            hasUserProvidedSpecialMembers());
+           !hasUserDeclaredMoveAssignment() &&
+           !hasUserDeclaredDestructor();
   }
 
   /// Determine whether we need to eagerly declare a defaulted move
@@ -951,11 +952,12 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
-    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment) &&
-           // In HLSL, only built-in records like resources classes can have
-           // constructors.
-           (!getLangOpts().HLSL || isLambda() ||
-            hasUserProvidedSpecialMembers());
+    // In HLSL, only built-in records like resources classes can have
+    // constructors and overloadable operators.
+    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
+      return false;
+
+    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment);
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -1013,17 +1015,18 @@ class CXXRecordDecl : public RecordDecl {
   /// assignment operator or if any existing special member function inhibits
   /// this.
   bool needsImplicitMoveAssignment() const {
+    // In HLSL, only built-in records like resources classes can have
+    // constructors and overloadable operators.
+    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
+      return false;
+
     return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveConstructor() && !hasUserDeclaredDestructor() &&
-           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
-           // In HLSL, only built-in records like resources classes can have
-           // constructors.
-           (!getLangOpts().HLSL ||
-            (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
-            hasUserProvidedSpecialMembers());
-  }
+           !hasUserDeclaredMoveConstructor() &&
+           !hasUserDeclaredDestructor() &&
+           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable());
+ }
 
   /// Determine whether we need to eagerly declare a move assignment
   /// operator for this class.
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 575592dc61fec..538dda8ed0881 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -1079,10 +1079,14 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
             PD->getAttr<HLSLParamModifierAttr>()) {
       llvm_unreachable("Not handled yet");
     } else {
-      llvm::Type *ParamType = Param.hasByValAttr() ? Param.getParamByValType()
-                              : PD->getType()->isRecordType()
-                                  ? CGM.getTypes().ConvertType(PD->getType())
-                                  : Param.getType();
+      llvm::Type *ParamType = nullptr;
+      if (Param.hasByValAttr())
+        ParamType = Param.getParamByValType();
+      else if (PD->getType()->isRecordType())
+        ParamType = CGM.getTypes().ConvertType(PD->getType());
+      else
+        ParamType = Param.getType();
+      
       auto AttrBegin = PD->specific_attr_begin<HLSLAppliedSemanticAttr>();
       auto AttrEnd = PD->specific_attr_end<HLSLAppliedSemanticAttr>();
       auto Result =
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index cdbcc70fa74d7..5f19e7ae803e5 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15523,8 +15523,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
   // various built-in candidates, but as DR507 points out, this can lead to
   // problems. So we do it this way, which pretty much follows what GCC does.
   // Note that we go the traditional code path for compound assignment forms.
-  // In HLSL, user-defined structs/classes do not have  or overloadable
-  // operators, so we can take this shortcut too.
+  // In HLSL, user-defined structs/classes do not have constructors or
+  // overloadable operators, so we can take this shortcut too.
   const Type *LHSTy = Args[0]->getType().getTypePtr();
   if (Opc == BO_Assign &&
       (!LHSTy->isOverloadableType() ||

>From e46f0e9abcd6588eaadaa1f585a6450b357ba7c7 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 15 May 2026 13:24:50 -0700
Subject: [PATCH 08/21] Handle assignment from derived to base class. Add test
 for this, plus AST struct tests.

---
 clang/lib/CodeGen/CGExprAgg.cpp               |  35 ++++-
 clang/lib/Sema/SemaExpr.cpp                   |  14 ++
 clang/test/AST/HLSL/StructPassing-AST.hlsl    | 128 ++++++++++++++++++
 .../BasicFeatures/StructPassing.hlsl          | 113 ++++++++++++++++
 .../resources/cbuffer_struct_passing.hlsl     |  67 +++++++--
 5 files changed, 347 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/AST/HLSL/StructPassing-AST.hlsl
 create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl

diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index a4282c4f51199..b4e9f9a567621 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -868,7 +868,40 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
     break;
   }
 
-  case CK_DerivedToBase:
+  case CK_DerivedToBase: {
+    assert(CGF.getLangOpts().HLSL &&
+           "Derived/Base casts in EmitAggExpr are only supported in HLSL");
+
+    // Create a temporary for the derived record, switch it out with the current
+    // Dest slot, and emit the derived value.
+    QualType DerivedTy = E->getSubExpr()->getType();
+    AggValueSlot DerivedTmpSlot = CGF.CreateAggTemp(DerivedTy, "tmp");
+
+    AggValueSlot DestBaseSlot = Dest;
+    Dest = DerivedTmpSlot;
+
+    Visit(E->getSubExpr());
+
+    // Perform derived-to-base address conversion to get the address
+    // of the base record within the derived record. In HLSL this should
+    // always be same as the derived because of single inheritance, but let's
+    // do it properly.
+    Address BaseAddrInDerived = CGF.GetAddressOfBaseClass(
+        DerivedTmpSlot.getAddress(), DerivedTy->castAsCXXRecordDecl(),
+        E->path_begin(), E->path_end(),
+        /*NullCheckValue=*/false, E->getExprLoc());
+
+    AggValueSlot SrcBaseSlot = AggValueSlot::forAddr(
+        BaseAddrInDerived, E->getType().getQualifiers(),
+        AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers,
+        AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap);
+
+    // Copy the base class to the original destination slot and restore it.
+    EmitCopy(E->getType(), DestBaseSlot, SrcBaseSlot);
+    Dest = DestBaseSlot;
+    break;
+  }
+
   case CK_BaseToDerived:
   case CK_UncheckedDerivedToBase: {
     llvm_unreachable("cannot perform hierarchy conversion in EmitAggExpr: "
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5c32697b5195a..b72406f169909 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10193,6 +10193,20 @@ AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType,
       return AssignConvertType::Incompatible;
   }
 
+  // For HLSL records, insert derived-to-base conversion if needed.
+  if (getLangOpts().HLSL && LHSType->isRecordType()) {
+    QualType RHSType = RHS.get()->getType();
+    if (!Context.hasSameUnqualifiedType(RHSType, LHSType)) {
+      CXXBasePaths Paths;
+      if (IsDerivedFrom(RHS.get()->getBeginLoc(), RHSType, LHSType, Paths)) {
+        CXXCastPath CastPath;
+        BuildBasePathArray(Paths, CastPath);
+        RHS = ImpCastExprToType(RHS.get(), LHSType, CK_DerivedToBase, VK_LValue,
+                                &CastPath);
+      }
+    }
+  }
+
   // This check seems unnatural, however it is necessary to ensure the proper
   // conversion of functions/arrays. If the conversion were done for all
   // DeclExpr's (created by ActOnIdExpression), it would mess up the unary
diff --git a/clang/test/AST/HLSL/StructPassing-AST.hlsl b/clang/test/AST/HLSL/StructPassing-AST.hlsl
new file mode 100644
index 0000000000000..8b6b4bbe9a824
--- /dev/null
+++ b/clang/test/AST/HLSL/StructPassing-AST.hlsl
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -ast-dump -o - %s | FileCheck %s
+
+// CHECK: CXXRecordDecl {{.*}} struct P definition
+// CHECK-NEXT: DefinitionData aggregate standard_layout trivially_copyable pod literal can_const_default_init
+// CHECK-NEXT: DefaultConstructor
+// CHECK-NEXT: CopyConstructor simple trivial implicit_has_const_param
+// CHECK-NEXT: MoveConstructor
+// CHECK-NEXT: CopyAssignment simple trivial implicit_has_const_param
+// CHECK-NEXT: MoveAssignment
+// CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit
+// CHECK-NEXT: PackedAttr
+// CHECK-NEXT: CXXRecordDecl {{.*}} struct P
+// CHECK-NEXT: FieldDecl {{.*}} a 'float'
+// CHECK-NOT: CXXConstructorDecl
+// CHECK-NOT: CXXMethodDecl {{.*}} operator=
+struct P {
+  float a;
+};
+
+// CHECK: CXXRecordDecl {{.*}} struct S definition
+// CHECK-NEXT: DefinitionData aggregate trivially_copyable literal can_const_default_init
+// CHECK-NEXT: DefaultConstructor
+// CHECK-NEXT: CopyConstructor simple trivial
+// CHECK-NEXT: MoveConstructor needs_overload_resolution
+// CHECK-NEXT: CopyAssignment simple trivial
+// CHECK-NEXT: MoveAssignment needs_overload_resolution
+// CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit
+// CHECK-NEXT: public 'P'
+// CHECK-NEXT: PackedAttr
+// CHECK-NEXT: CXXRecordDecl {{.*}} implicit struct S
+// CHECK-NEXT: FieldDecl {{.*}} b 'double'
+// CHECK-NEXT: FieldDecl {{.*}} c 'int[2]'
+// CHECK-NOT: CXXConstructorDecl
+// CHECK-NOT: CXXMethodDecl {{.*}} operator=
+struct S : P {
+  double b;
+  int c[2];
+};
+
+// CHECK: FunctionDecl {{.*}} case1 'void (S)'
+// CHECK-NEXT: ParmVarDecl {{.*}} s 'S'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: DeclStmt
+// CHECK-NEXT: VarDecl {{.*}} sLocal 'S'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'S' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue ParmVar {{.*}} 's' 'S'
+void case1(S s) {
+  // struct initialization
+  S sLocal = s;
+}
+
+// CHECK: FunctionDecl {{.*}} case2 'void (S)'
+// CHECK-NEXT: ParmVarDecl {{.*}} s 'S'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: DeclStmt
+// CHECK-NEXT: VarDecl {{.*}} sLocal 'S'
+// CHECK-NEXT: BinaryOperator {{.*}} 'S' lvalue '='
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue Var {{.*}} 'sLocal' 'S'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'S' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue ParmVar {{.*}} 's' 'S' 
+void case2(S s) {
+  S sLocal;
+  // struct assignment
+  sLocal = s;
+}
+
+void useS(S s) {}
+
+// CHECK: FunctionDecl {{.*}} case3 'void (S)'
+// CHECK-NEXT: ParmVarDecl {{.*}} used s 'S'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(S)' <FunctionToPointerDecay>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'void (S)' lvalue Function {{.*}} 'useS' 'void (S)'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'S' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue ParmVar {{.*}} 's' 'S'
+void case3(S s) {
+  // struct argument passing
+  useS(s);
+}
+
+// CHECK: FunctionDecl {{.*}} case4 'void (S)'
+// CHECK-NEXT: ParmVarDecl {{.*}} used s 'S'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: DeclStmt
+// CHECK-NEXT: VarDecl {{.*}} pLocal 'P'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'P' <DerivedToBase (P)>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'S' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue ParmVar {{.*}} 's' 'S'
+void case4(S s) {
+  // derived to base conversion in initialization
+  P pLocal = s;
+}
+
+// CHECK: FunctionDecl {{.*}} case5 'void (S)'
+// CHECK-NEXT: ParmVarDecl {{.*}} used s 'S'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: DeclStmt
+// CHECK-NEXT: VarDecl {{.*}} pLocal 'P'
+// CHECK-NEXT: BinaryOperator {{.*}} 'P' lvalue '='
+// CHECK-NEXT: DeclRefExpr {{.*}} 'P' lvalue Var {{.*}} 'pLocal' 'P'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'P' <LValueToRValue>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'P' lvalue <DerivedToBase (P)>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue ParmVar {{.*}} 's' 'S'
+void case5(S s) {
+  P pLocal;
+  // derived to base conversion in assignment
+  pLocal = s;
+}
+
+void useP(P p) {}
+
+// CHECK: FunctionDecl {{.*}} case6 'void (S)'
+// CHECK-NEXT: ParmVarDecl {{.*}} used s 'S'
+// CHECK-NEXT: CompoundStmt
+// CHECK-NEXT: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(P)' <FunctionToPointerDecay>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'void (P)' lvalue Function {{.*}} 'useP' 'void (P)'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'P' <DerivedToBase (P)>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'S' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue ParmVar {{.*}} 's' 'S'
+void case6(S s) {
+  // derived to base conversion in argument passing
+  useP(s);
+}
+
+// CHECK-NOT: CXXConstructExpr
+// CHECK-NOT: CXXOperatorCallExpr
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl
new file mode 100644
index 0000000000000..5c6940c4d2e4d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl
@@ -0,0 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+struct P {
+  float a;
+};
+
+struct S : P {
+  double b;
+  int c[2];
+};
+
+// CHECK-LABEL: define hidden void @_Z5case11S(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[SLOCAL:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[SLOCAL]], ptr align 1 [[S]], i32 20, i1 false)
+// CHECK-NEXT:    ret void
+//
+void case1(S s) {
+  // struct initialization
+  S sLocal = s;
+}
+
+// CHECK-LABEL: define hidden void @_Z5case21S(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[SLOCAL:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[SLOCAL]], ptr align 1 [[S]], i32 20, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SLOCAL]], i32 20, i1 false)
+// CHECK-NEXT:    ret void
+//
+void case2(S s) {
+  S sLocal;
+  // struct assignment
+  sLocal = s;
+}
+
+void useS(S s) {}
+
+// CHECK-LABEL: define hidden void @_Z5case31S(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[BYVAL_TEMP]], ptr align 1 [[S]], i32 20, i1 false)
+// CHECK-NEXT:    call void @_Z4useS1S(ptr noundef dead_on_return [[BYVAL_TEMP]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+void case3(S s) {
+  // struct argument passing
+  useS(s);
+}
+
+// CHECK-LABEL: define hidden void @_Z5case41S(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[PLOCAL:%.*]] = alloca [[STRUCT_P:%.*]], align 1
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[S]], i32 20, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[PLOCAL]], ptr align 1 [[TMP]], i32 4, i1 false)
+// CHECK-NEXT:    ret void
+//
+void case4(S s) {
+  // derived to base conversion in initialization
+  P pLocal = s;
+}
+
+// CHECK-LABEL: define hidden void @_Z5case51S(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_P:%.*]], align 1
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_P]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP1]], ptr align 1 [[S]], i32 20, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[P]], ptr align 1 [[TMP1]], i32 4, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[P]], i32 4, i1 false)
+// CHECK-NEXT:    ret void
+//
+void case5(S s) {
+  P p;
+  // derived to base conversion in assignment
+  p = s;
+}
+
+void useP(P p) {}
+
+// CHECK-LABEL: define hidden void @_Z5case61S(
+// CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_P:%.*]], align 1
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[S]], i32 20, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TMP]], ptr align 1 [[TMP]], i32 4, i1 false)
+// CHECK-NEXT:    call void @_Z4useP1P(ptr noundef dead_on_return [[AGG_TMP]]) #[[ATTR2]]
+// CHECK-NEXT:    ret void
+//
+void case6(S s) {
+  // derived to base conversion in argument passing
+  useP(s);
+}
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
index 7858fbcf68543..d1eb2f740dbf2 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
@@ -125,7 +125,7 @@ void case1() {
 
 // CHECK-NEXT: ret void
 void case2() {
-  S AggTemp = (S)cbs;
+  S LocalS = (S)cbs;
 }
 
 // CHECK-LABEL: case3
@@ -135,8 +135,8 @@ void case3() {
 
 // CHECK-NEXT: [[LocalT:%.*]] = alloca %struct.T, align 1
 // CHECK-NEXT: [[LocalTCopy:%.*]] = alloca %struct.T, align 1
-// CHECK-NEXT: [[AggTemp:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[AggTempCopy:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[LocalS:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[LocalSCopy:%.*]] = alloca %struct.S, align 1
 
 // Check that constant to default address space copies the struct field by field
 //
@@ -170,21 +170,70 @@ void case3() {
 
 // Check that constant to default address space copies the struct field by field
 //
-// CHECK-NEXT: [[Ptr_a1:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 0
+// CHECK-NEXT: [[Ptr_a1:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 0
 // CHECK-NEXT: [[CbufLoad_a1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, align 4
 // CHECK-NEXT: store <3 x float> [[CbufLoad_a1]], ptr [[Ptr_a1]], align 4
-// CHECK-NEXT: [[Ptr_b1:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 1
+// CHECK-NEXT: [[Ptr_b1:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 1
 // CHECK-NEXT: [[CbufLoad_b1:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 16), align 8
 // CHECK-NEXT: store double [[CbufLoad_b1]], ptr [[Ptr_b1]], align 8
-// CHECK-NEXT: [[Ptr_c1:%.*]] = getelementptr inbounds %struct.S, ptr [[AggTemp]], i32 0, i32 2
+// CHECK-NEXT: [[Ptr_c1:%.*]] = getelementptr inbounds %struct.S, ptr [[LocalS]], i32 0, i32 2
 // CHECK-NEXT: [[CbufLoad_c1:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbt, {{i32|i64}} 32), align 4
 // CHECK-NEXT: store <4 x float> [[CbufLoad_c1]], ptr [[Ptr_c1]], align 4
-  S AggTemp = cbt.s;
+  S localS = cbt.s;
 
 // Check that default to default address space copy uses memcpy
 //
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[AggTempCopy]], ptr align 1 [[AggTemp]], {{i32|i64}} 36, i1 false)
-  S AggTempCopy = AggTemp;
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[LocalSCopy]], ptr align 1 [[LocalS]], {{i32|i64}} 36, i1 false)
+  S localSCopy = localS;
+
+// CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL: case4
+// CHECK-NEXT: entry:
+// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+void case4() {
+
+// CHECK-NEXT: [[LocalP1:%.*]] = alloca %struct.P, align 1
+// CHECK-NEXT: [[Tmp0:%.*]] = alloca %struct.S, align 1
+// CHECK-NEXT: [[LocalP2:%.*]] = alloca %struct.P, align 1
+// CHECK-NEXT: [[Tmp1:%.*]] = alloca %struct.P, align 1
+// CHECK-NEXT: [[Tmp2:%.*]] = alloca %struct.S, align 1
+
+// CHECK-NEXT: [[Tmp0Ptr_a1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp0]], i32 0, i32 0
+// CHECK-NEXT: [[CbufLoad_a1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
+// CHECK-NEXT: store <3 x float> [[CbufLoad_a1]], ptr [[Tmp0Ptr_a1]], align 4
+
+// CHECK-NEXT: [[Tmp0Ptr_b1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp0]], i32 0, i32 1
+// CHECK-NEXT: [[CbufLoad_b1:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 8
+// CHECK-NEXT: store double [[CbufLoad_b1]], ptr [[Tmp0Ptr_b1]], align 8
+
+// CHECK-NEXT: [[Tmp0Ptr_c1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp0]], i32 0, i32 2
+// CHECK-NEXT: [[CbufLoad_c1:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 32), align 4
+// CHECK-NEXT: store <4 x float> [[CbufLoad_c1]], ptr [[Tmp0Ptr_c1]], align 4
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[LocalP1]], ptr align 1 [[Tmp0]], {{i32|i64}} 12, i1 false)
+
+  // Derived to base conversion in initialization. Size of S in memory layout is 36 bytes and
+  // size of P is 12 bytes. The memcpy should only copy the 12 bytes of P.
+  P LocalP1 = cbs;
+
+// CHECK-NEXT: [[Tmp2Ptr_a1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp2]], i32 0, i32 0
+// CHECK-NEXT: [[CbufLoad_a1:%.*]] = load <3 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, align 4
+// CHECK-NEXT: store <3 x float> [[CbufLoad_a1]], ptr [[Tmp2Ptr_a1]], align 4
+// CHECK-NEXT: [[Tmp2Ptr_b1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp2]], i32 0, i32 1
+// CHECK-NEXT: [[CbufLoad_b1:%.*]] = load double, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 16), align 8
+// CHECK-NEXT: store double [[CbufLoad_b1]], ptr [[Tmp2Ptr_b1]], align 8
+// CHECK-NEXT: [[Tmp2Ptr_c1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp2]], i32 0, i32 2
+// CHECK-NEXT: [[CbufLoad_c1:%.*]] = load <4 x float>, ptr addrspace([[CONST_ADDR_SPACE]]) getelementptr inbounds nuw (i8, ptr addrspace([[CONST_ADDR_SPACE]]) @cbs, {{i32|i64}} 32), align 4
+// CHECK-NEXT: store <4 x float> [[CbufLoad_c1]], ptr [[Tmp2Ptr_c1]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[LocalP2]], ptr align 1 [[Tmp2]], {{i32|i64}} 12, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.{{i32|i64}}(ptr align 1 [[Tmp1]], ptr align 1 [[LocalP2]], {{i32|i64}} 12, i1 false)
+
+  // Derived to base conversion in assignment. Size of S in memory layout is 36 bytes and
+  // size of P is 12 bytes. The memcpy should only copy the 12 bytes of P.
+  P LocalP2;
+  LocalP2 = cbs;
 
 // CHECK-NEXT: ret void
 }

>From 13dc39199b3f00e01ade4b4a00523c50ce14b564 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 15 May 2026 13:27:37 -0700
Subject: [PATCH 09/21] fix typos in comments

---
 clang/include/clang/AST/DeclCXX.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 2a73e56f2465f..dd993187f9244 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -764,7 +764,7 @@ class CXXRecordDecl : public RecordDecl {
   // have ctors, dtors, or overloaded operators, while implicit built-in
   // HLSL records such as resource classes can. It would be nice to use the
   // isImplicit() methods to determine that, but this flag is not propagated
-  // to template-instanticated classes.
+  // to template-instantiated classes.
   //
   /// Determines whether this class has any user provided special members.
   bool hasUserProvidedSpecialMembers() const {
@@ -778,7 +778,7 @@ class CXXRecordDecl : public RecordDecl {
   ///
   /// This value is used for lazy creation of default constructors.
   bool needsImplicitDefaultConstructor() const {
-    // In HLSL, only built-in records like resources classes can have
+    // In HLSL, only built-in records like resource classes can have
     // constructors and overloadable operators.
     if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
       return false;
@@ -816,7 +816,7 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
-    // In HLSL, only built-in records like resources classes can have
+    // In HLSL, only built-in records like resource classes can have
     // constructors and overloadable operators.
     if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
       return false;
@@ -914,7 +914,7 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class should get an implicit move
   /// constructor or if any existing special member function inhibits this.
   bool needsImplicitMoveConstructor() const {
-    // In HLSL, only built-in records like resources classes can have
+    // In HLSL, only built-in records like resource classes can have
     // constructors and overloadable operators.
     if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
       return false;
@@ -952,7 +952,7 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
-    // In HLSL, only built-in records like resources classes can have
+    // In HLSL, only built-in records like resource classes can have
     // constructors and overloadable operators.
     if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
       return false;
@@ -1015,7 +1015,7 @@ class CXXRecordDecl : public RecordDecl {
   /// assignment operator or if any existing special member function inhibits
   /// this.
   bool needsImplicitMoveAssignment() const {
-    // In HLSL, only built-in records like resources classes can have
+    // In HLSL, only built-in records like resource classes can have
     // constructors and overloadable operators.
     if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
       return false;

>From 12cf8f4de5ae17aa8420aa4138037e9e8542b41a Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 15 May 2026 13:40:33 -0700
Subject: [PATCH 10/21] Update AST test to change that default and move
 constructors and move assignment do not exist.

---
 clang/test/AST/HLSL/StructPassing-AST.hlsl | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/clang/test/AST/HLSL/StructPassing-AST.hlsl b/clang/test/AST/HLSL/StructPassing-AST.hlsl
index 8b6b4bbe9a824..8ae83fa89b390 100644
--- a/clang/test/AST/HLSL/StructPassing-AST.hlsl
+++ b/clang/test/AST/HLSL/StructPassing-AST.hlsl
@@ -2,13 +2,10 @@
 
 // CHECK: CXXRecordDecl {{.*}} struct P definition
 // CHECK-NEXT: DefinitionData aggregate standard_layout trivially_copyable pod literal can_const_default_init
-// CHECK-NEXT: DefaultConstructor
-// CHECK-NEXT: CopyConstructor simple trivial implicit_has_const_param
-// CHECK-NEXT: MoveConstructor
-// CHECK-NEXT: CopyAssignment simple trivial implicit_has_const_param
-// CHECK-NEXT: MoveAssignment
-// CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit
-// CHECK-NEXT: PackedAttr
+// CHECK-NOT: DefaultConstructor {{.*}} exists
+// CHECK-NOT: MoveConstructor {{.*}} exists
+// CHECK-NOT: MoveAssignment {{.*}} exists
+// CHECK: PackedAttr
 // CHECK-NEXT: CXXRecordDecl {{.*}} struct P
 // CHECK-NEXT: FieldDecl {{.*}} a 'float'
 // CHECK-NOT: CXXConstructorDecl
@@ -19,13 +16,10 @@ struct P {
 
 // CHECK: CXXRecordDecl {{.*}} struct S definition
 // CHECK-NEXT: DefinitionData aggregate trivially_copyable literal can_const_default_init
-// CHECK-NEXT: DefaultConstructor
-// CHECK-NEXT: CopyConstructor simple trivial
-// CHECK-NEXT: MoveConstructor needs_overload_resolution
-// CHECK-NEXT: CopyAssignment simple trivial
-// CHECK-NEXT: MoveAssignment needs_overload_resolution
-// CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit
-// CHECK-NEXT: public 'P'
+// CHECK-NOT: DefaultConstructor {{.*}} exists
+// CHECK-NOT: MoveConstructor {{.*}} exists
+// CHECK-NOT: MoveAssignment {{.*}} exists
+// CHECK: public 'P'
 // CHECK-NEXT: PackedAttr
 // CHECK-NEXT: CXXRecordDecl {{.*}} implicit struct S
 // CHECK-NEXT: FieldDecl {{.*}} b 'double'

>From 60ad20aa0ea27b2af3d2ecd109c47fff8bbf2a21 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 15 May 2026 14:09:05 -0700
Subject: [PATCH 11/21] clang-format

---
 clang/include/clang/AST/DeclCXX.h   | 2 +-
 clang/lib/CodeGen/CGHLSLRuntime.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 9cdc0613dbdab..36cd8f519ea34 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -1026,7 +1026,7 @@ class CXXRecordDecl : public RecordDecl {
            !hasUserDeclaredMoveConstructor() &&
            !hasUserDeclaredDestructor() &&
            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable());
- }
+  }
 
   /// Determine whether we need to eagerly declare a move assignment
   /// operator for this class.
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 538dda8ed0881..555835a7966fc 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -1086,7 +1086,7 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
         ParamType = CGM.getTypes().ConvertType(PD->getType());
       else
         ParamType = Param.getType();
-      
+
       auto AttrBegin = PD->specific_attr_begin<HLSLAppliedSemanticAttr>();
       auto AttrEnd = PD->specific_attr_end<HLSLAppliedSemanticAttr>();
       auto Result =

>From 0a34df32a483bd43c234e9afe6798156fbcda9ca Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 15 May 2026 18:35:59 -0700
Subject: [PATCH 12/21] Update tests after merge

---
 clang/test/AST/HLSL/ConstantBuffers-AST-error.hlsl             | 2 +-
 clang/test/CodeGenHLSL/cbuffer_copy_layout.hlsl                | 1 -
 .../CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl    | 3 ++-
 clang/test/SemaHLSL/Resources/ConstantBuffers.hlsl             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/test/AST/HLSL/ConstantBuffers-AST-error.hlsl b/clang/test/AST/HLSL/ConstantBuffers-AST-error.hlsl
index 3e2d8075a6569..4f9d60c741f90 100644
--- a/clang/test/AST/HLSL/ConstantBuffers-AST-error.hlsl
+++ b/clang/test/AST/HLSL/ConstantBuffers-AST-error.hlsl
@@ -19,6 +19,6 @@ void main() {
   // CHECK: error: no viable constructor copying variable of type 'const hlsl_constant S'
   S s2 = cb;
 
-  // CHECK: error: no viable conversion from 'ConstantBuffer<S>' to 'const S'
+  // CHECK: error: assigning to 'S' from incompatible type 'ConstantBuffer<S>'
   s = cb;
 }
diff --git a/clang/test/CodeGenHLSL/cbuffer_copy_layout.hlsl b/clang/test/CodeGenHLSL/cbuffer_copy_layout.hlsl
index ed1fe3ac0014e..022844284f4ba 100644
--- a/clang/test/CodeGenHLSL/cbuffer_copy_layout.hlsl
+++ b/clang/test/CodeGenHLSL/cbuffer_copy_layout.hlsl
@@ -16,7 +16,6 @@ ConstantBuffer<S> cb;
 
 [numthreads(1,1,1)]
 void main() {
-  // CHECK: error: no matching constructor for initialization of 'S'
   S l1 = s_cb;
 
   // CHECK: error: no viable constructor copying variable of type 'const hlsl_constant S'
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
index 4e1c1b7b55984..9065d469e5522 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
@@ -19,6 +19,7 @@ void main(unsigned GI : SV_GroupIndex) {
   // CHECK: define void @main()
 
   // CHECK: %[[TMP:.*]] = alloca %struct.S, align 1
+  // SPV: %[[TMPCAST:.*]] = addrspacecast ptr %[[TMP]] to ptr addrspace(11)
 
   // DXIL: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_0_0t.i32(target("dx.RawBuffer", i32, 0, 0) %{{.*}}, i32 %{{.*}})
   // SPV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_0t.i32(target("spirv.VulkanBuffer", [0 x i32], 12, 0) %{{.*}}, i32 %{{.*}})
@@ -39,7 +40,7 @@ void main(unsigned GI : SV_GroupIndex) {
   // SPV: %[[OUTPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
   // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
   // SPV: call void @llvm.memcpy.p11.p11.i64(ptr addrspace(11) align 1 %[[OUTPTR]], ptr addrspace(11) align 1 %[[INPTR]], i64 4, i1 false)
-  // SPV: call void @llvm.memcpy.p0.p11.i64(ptr align 1 %[[TMP]], ptr addrspace(11) align 1 %[[OUTPTR]], i64 4, i1 false)
+  // SPV: call void @llvm.memcpy.p11.p11.i64(ptr addrspace(11) align 1 %[[TMPCAST]], ptr addrspace(11) align 1 %[[OUTPTR]], i64 4, i1 false)
 
   // DXIL: %[[OUTPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
   // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
diff --git a/clang/test/SemaHLSL/Resources/ConstantBuffers.hlsl b/clang/test/SemaHLSL/Resources/ConstantBuffers.hlsl
index 0ef3ada50c988..6dc566a58b5e4 100644
--- a/clang/test/SemaHLSL/Resources/ConstantBuffers.hlsl
+++ b/clang/test/SemaHLSL/Resources/ConstantBuffers.hlsl
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -finclude-default-header -fsyntax-only -verify %s
 
-struct S { // expected-note 3 {{candidate constructor}}
+struct S {
   float a;
   int b;
 };

>From de727fbdd9dbed318d784555db999b6fcb26191d Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Mon, 18 May 2026 16:03:48 -0700
Subject: [PATCH 13/21] Update test after merge

---
 .../CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
index 9065d469e5522..4e1c1b7b55984 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
@@ -19,7 +19,6 @@ void main(unsigned GI : SV_GroupIndex) {
   // CHECK: define void @main()
 
   // CHECK: %[[TMP:.*]] = alloca %struct.S, align 1
-  // SPV: %[[TMPCAST:.*]] = addrspacecast ptr %[[TMP]] to ptr addrspace(11)
 
   // DXIL: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_0_0t.i32(target("dx.RawBuffer", i32, 0, 0) %{{.*}}, i32 %{{.*}})
   // SPV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_0t.i32(target("spirv.VulkanBuffer", [0 x i32], 12, 0) %{{.*}}, i32 %{{.*}})
@@ -40,7 +39,7 @@ void main(unsigned GI : SV_GroupIndex) {
   // SPV: %[[OUTPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
   // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
   // SPV: call void @llvm.memcpy.p11.p11.i64(ptr addrspace(11) align 1 %[[OUTPTR]], ptr addrspace(11) align 1 %[[INPTR]], i64 4, i1 false)
-  // SPV: call void @llvm.memcpy.p11.p11.i64(ptr addrspace(11) align 1 %[[TMPCAST]], ptr addrspace(11) align 1 %[[OUTPTR]], i64 4, i1 false)
+  // SPV: call void @llvm.memcpy.p0.p11.i64(ptr align 1 %[[TMP]], ptr addrspace(11) align 1 %[[OUTPTR]], i64 4, i1 false)
 
   // DXIL: %[[OUTPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
   // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})

>From bd00ba0c96cd1ebe3913cc8b6d3aeb08cee394f0 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 21 May 2026 12:13:21 -0700
Subject: [PATCH 14/21] code review feedback - HLSL-specific isPODType, update
 overloading binop check

---
 clang/include/clang/AST/TypeBase.h            |   5 +
 clang/include/clang/Sema/SemaHLSL.h           |   2 +
 clang/lib/AST/Type.cpp                        |  20 +++
 clang/lib/CodeGen/CGDecl.cpp                  |   8 +-
 clang/lib/Sema/SemaExpr.cpp                   |  21 +--
 clang/lib/Sema/SemaHLSL.cpp                   |  11 ++
 clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak    | 160 ++++++++++++++++++
 .../this-assignment-overload.hlsl.bak         |  58 +++++++
 8 files changed, 264 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak
 create mode 100644 clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak

diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index c64eee11fd91e..779a4eba3468d 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -1111,6 +1111,11 @@ class QualType {
   /// CXXRecordDecl::isCXX11StandardLayout, this takes DRs into account.
   bool isCXX11PODType(const ASTContext &Context) const;
 
+  /// Return true if this is a POD type according to the HLSL rules.
+  /// User-defined records in HLSL do not have default constructors, which
+  /// is a POD requirement for C++.
+  bool isHLSLPODType(const ASTContext &Context) const;
+
   /// Return true if this is a trivial type per (C++0x [basic.types]p9)
   bool isTrivialType(const ASTContext &Context) const;
 
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index e65de5d4aa4c3..02304ba0891f8 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -138,6 +138,8 @@ class SemaHLSL : public SemaBase {
   bool CheckResourceBinOp(BinaryOperatorKind Opc, Expr *LHSExpr, Expr *RHSExpr,
                           SourceLocation Loc);
 
+  bool canHaveOverloadedBinOp(QualType Ty, BinaryOperatorKind Opc);
+
   QualType handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS,
                                        QualType LHSType, QualType RHSType,
                                        bool IsCompAssign);
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 96a398aa21dad..d4504e6b04f8c 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2786,6 +2786,10 @@ QualType Type::getRVVEltType(const ASTContext &Ctx) const {
 }
 
 bool QualType::isPODType(const ASTContext &Context) const {
+  // HLSL has a more relaxed definition of POD than C++11.
+  if (Context.getLangOpts().HLSL)
+    return isHLSLPODType(Context);
+
   // C++11 has a more relaxed definition of POD.
   if (Context.getLangOpts().CPlusPlus11)
     return isCXX11PODType(Context);
@@ -3290,6 +3294,22 @@ bool QualType::isCXX11PODType(const ASTContext &Context) const {
   return false;
 }
 
+bool QualType::isHLSLPODType(const ASTContext &Context) const {
+  if (isCXX11PODType(Context))
+    return true;
+
+  const Type *BaseTy = getTypePtr()->getBaseElementTypeUnsafe();
+  if (const auto *RD =
+          dyn_cast_or_null<CXXRecordDecl>(BaseTy->getAsRecordDecl())) {
+    // User-defined records in HLSL do not have constructors or copy/assignment
+    // operators. They are still considered POD.
+    if (!RD->hasUserProvidedSpecialMembers() || RD->isTrivial() ||
+        RD->isStandardLayout())
+      return true;
+  }
+  return false;
+}
+
 bool Type::isNothrowT() const {
   if (const auto *RD = getAsCXXRecordDecl()) {
     IdentifierInfo *II = RD->getIdentifier();
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index feeedc94de179..7608f8cb6fc7a 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1525,14 +1525,10 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
     // isConstantInitializer produces wrong answers for structs with
     // reference or bitfield members, and a few other cases, and checking
     // for POD-ness protects us from some of these.
-    QualType BaseTy = getContext().getBaseElementType(Ty);
     if (D.getInit() && (Ty->isArrayType() || Ty->isRecordType()) &&
         (D.isConstexpr() ||
-         ((Ty.isPODType(getContext()) || BaseTy->isObjCObjectPointerType() ||
-           // If HLSL, check if it's a constant initializer anyway because
-           // POD-ness will no longer be true for user defined structs
-           // (since they do not have constructors).
-           (getLangOpts().HLSL && BaseTy->isRecordType())) &&
+         ((Ty.isPODType(getContext()) ||
+           getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) &&
           D.getInit()->isConstantInitializer(getContext())))) {
 
       // If the variable's a const type, and it's neither an NRVO
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 50fcff9001f88..7d28fec1dbcc2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16125,21 +16125,12 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   }
 
   if (getLangOpts().CPlusPlus) {
-    // Otherwise, build an overloaded op if either expression is type-dependent
-    // or has an overloadable type.
-    // In HLSL, user-defined structs/classes do not have ctors or
-    // overloadable operators.
-    QualType LHSTy = LHSExpr->getType();
-    QualType RHSTy = RHSExpr->getType();
-    bool IsLHSNonOverloadableHLSLType =
-        getLangOpts().HLSL && LHSTy->isRecordType() &&
-        !LHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers();
-    bool IsRHSNonOverloadableHLSLType =
-        getLangOpts().HLSL && RHSTy->isRecordType() &&
-        !RHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers();
-    if (LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
-        (LHSTy->isOverloadableType() && !IsLHSNonOverloadableHLSLType) ||
-        (RHSTy->isOverloadableType() && !IsRHSNonOverloadableHLSLType))
+    if ((LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
+         LHSExpr->getType()->isOverloadableType() ||
+         RHSExpr->getType()->isOverloadableType()) &&
+        (!getLangOpts().HLSL ||
+         HLSL().canHaveOverloadedBinOp(LHSExpr->getType(), Opc) ||
+         HLSL().canHaveOverloadedBinOp(RHSExpr->getType(), Opc)))
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
   }
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index ad87e22e49343..c201f24cf5049 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -5745,6 +5745,17 @@ bool SemaHLSL::CheckResourceBinOp(BinaryOperatorKind Opc, Expr *LHSExpr,
   return true;
 }
 
+// Returns true if the given type can have an overload of the given
+// binary operator.
+bool SemaHLSL::canHaveOverloadedBinOp(QualType LHSTy, BinaryOperatorKind Opc) {
+  CXXRecordDecl *RD = LHSTy->getAsCXXRecordDecl();
+  if (!RD)
+    return true;
+  // hasUserProvidedSpecialMembers() should be true only for HLSL built-in
+  // records like resources.
+  return RD->hasUserProvidedSpecialMembers() || Opc != BO_Assign;
+}
+
 // Walks though the global variable declaration, collects all resource binding
 // requirements and adds them to Bindings
 void SemaHLSL::collectResourceBindingsOnVarDecl(VarDecl *VD) {
diff --git a/clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak b/clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak
new file mode 100644
index 0000000000000..a17e0ae6038b1
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak
@@ -0,0 +1,160 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// CHECK: [[ConstInit_fn3_s:@.*]] = private unnamed_addr constant { <4 x i1>, [12 x i8], float } { <4 x i1> <i1 true, i1 false, i1 true, i1 false>, [12 x i8] undef, float 1.000000e+00 }, align 1
+// CHECK: [[ConstInit_fn4_Arr:@.*]] = private unnamed_addr constant [2 x <4 x i1>] [<4 x i1> splat (i1 true), <4 x i1> zeroinitializer], align 4
+// CHECK: [[ConstInit_fn6_s:@.*]] = private unnamed_addr constant { <4 x i1>, [12 x i8], float } { <4 x i1> <i1 true, i1 false, i1 true, i1 false>, [12 x i8] undef, float 1.000000e+00 }, align 1
+// CHECK: [[ConstInit_fn7_Arr:@.*]] = private unnamed_addr constant [2 x <4 x i1>] [<4 x i1> splat (i1 true), <4 x i1> zeroinitializer], align 4
+
+
+struct S {
+    bool2x2 bM;
+    float f;
+};
+
+// CHECK-LABEL: define hidden noundef i1 @_Z3fn1v(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [2 x <2 x i32>], align 4
+// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[B]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[B]], align 4
+// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    ret i1 [[TMP1]]
+//
+bool fn1() {
+  bool2x2 B = {true,true,true,true};
+  return B[0][0];
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i1> @_Z3fn2b(
+// CHECK-SAME: i1 noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x i1>, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x <2 x i32>], align 4
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[V]] to i32
+// CHECK-NEXT:    store i32 [[STOREDV]], ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i1> poison, i1 [[LOADEDV]], i32 0
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x i1> [[VECINIT]], i1 true, i32 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x i1> [[VECINIT1]], i1 [[LOADEDV2]], i32 1
+// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <4 x i1> [[VECINIT3]], i1 false, i32 3
+// CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[VECINIT4]] to <4 x i32>
+// CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[A]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
+// CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i1>, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    ret <4 x i1> [[TMP4]]
+//
+bool2x2 fn2(bool V) {
+  bool2x2 A = {V, true, V, false};
+  return A;
+}
+
+// CHECK-LABEL: define hidden noundef i1 @_Z3fn3v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[ConstInit_fn3_s]], i32 20, i1 false)
+// CHECK-NEXT:    [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BM]], align 1
+// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    ret i1 [[TMP1]]
+//
+bool fn3() {
+  S s = {{true,true,false,false}, 1.0};
+  return s.bM[0][0];
+}
+
+// CHECK-LABEL: define hidden noundef i1 @_Z3fn4v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
+// CHECK-NEXT:    [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 %Arr, ptr align 4 [[ConstInit_fn4_Arr]], i32 8, i1 false)
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+// CHECK-NEXT:    store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    ret i1 [[TMP1]]
+//
+bool fn4() {
+  bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
+  return Arr[0][1][0];
+}
+
+// CHECK-LABEL: define hidden void @_Z3fn5v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca [2 x <2 x i32>], align 4
+// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[M]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[M]], i32 0, i32 3
+// CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
+// CHECK-NEXT:    ret void
+//
+void fn5() {
+  bool2x2 M = {true,true,true,true};
+  M[1][1] = false;
+}
+
+// CHECK-LABEL: define hidden void @_Z3fn6v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    store i32 0, ptr [[V]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[ConstInit_fn6_s]], i32 20, i1 false)
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[V]], align 4
+// CHECK-NEXT:    [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <4 x i32>, ptr [[BM]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
+// CHECK-NEXT:    ret void
+//
+void fn6() {
+  bool V = false;
+  S s = {{true,true,false,false}, 1.0};
+  s.bM[1][0] = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z3fn7v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 [[ConstInit_fn7_Arr]], i32 8, i1 false)
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[ARRAYIDX]], i32 0, i32 1
+// CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
+// CHECK-NEXT:    ret void
+//
+void fn7() {
+  bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
+  Arr[0][1][0] = false;
+}
+
+// CHECK-LABEL: define hidden noundef <16 x i1> @_Z3fn8u11matrix_typeILm4ELm4EbE(
+// CHECK-SAME: <16 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i1>, align 4
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
+// CHECK-NEXT:    store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i1>, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    ret <16 x i1> [[TMP2]]
+//
+bool4x4 fn8(bool4x4 m) {
+  return m;
+}
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak
new file mode 100644
index 0000000000000..7b08f7b96c624
--- /dev/null
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes -o - -std=hlsl202x %s | FileCheck %s
+
+struct Pair {
+  int First;
+  int Second;
+  int getFirst() {
+    Pair Another = {5, 10};
+    this = Another;
+    return this.First;
+  }
+  int getSecond() {
+    this = {0, 123};
+    return Second;
+  }
+  void operator=(Pair P) {
+    First = P.First;
+    Second = 2;
+  }
+};
+[numthreads(1, 1, 1)]
+void main() {
+  Pair Vals = {1, 2};
+  Vals.First = Vals.getFirst();
+  Vals.Second = Vals.getSecond();
+}
+
+// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
+// CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: %Another = alloca %struct.Pair, align 1
+// CHECK-NEXT: [[AggTmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
+// CHECK-NEXT: store i32 5, ptr [[First]], align 1
+// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
+// CHECK-NEXT: store i32 10, ptr [[Second]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AggTmp]], ptr align 1 %Another, i32 8, i1 false)
+// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[AggTmp]])
+// CHECK-NEXT: [[First2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
+// CHECK-NEXT: %0 = load i32, ptr [[First2]], align 1
+// CHECK-NEXT: ret i32 %0
+
+// CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
+// CHECK-NEXT:entry:
+// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
+// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: store i32 0, ptr [[First]], align 1
+// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: store i32 123, ptr [[Second]], align 1
+// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
+// CHECK-NEXT: [[Second2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
+// CHECK-NEXT: %0 = load i32, ptr [[Second2]], align 1
+// CHECK-NEXT: ret i32 %0

>From 91cf4b26bad70f23561335e2c43ff5d521422bb1 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Tue, 26 May 2026 16:31:43 -0700
Subject: [PATCH 15/21] Update tests after merge

---
 .../BasicFeatures/ArrayElementwiseCast.hlsl            |  1 +
 clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl    |  3 ++-
 .../BasicFeatures/StructElementwiseCast.hlsl           |  2 ++
 .../test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl  |  6 ++++++
 .../BasicFeatures/VectorElementwiseCast.hlsl           |  2 ++
 .../CodeGenHLSL/resources/cbuffer_struct_passing.hlsl  | 10 +++++-----
 clang/test/CodeGenHLSL/this-assignment-overload.hlsl   |  8 ++++----
 7 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index 287973a7f7764..b8611914efb29 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -157,6 +157,7 @@ struct Derived : BFields {
 // flatten from a derived struct with bitfields
 // CHECK-LABEL: call8
 // CHECK-NEXT: entry:
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT: [[DIndirectAddr:%.*]] = alloca ptr, align 4
 // CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 4
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 5873f70319de9..183bfb6f3d87a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -1181,7 +1181,7 @@ void case24() {
 // CHECK-SAME: ptr noundef dead_on_return [[ED:%.*]], ptr noundef dead_on_return [[UD:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
-  // CHECK-NEXT:    [[ED_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[ED_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[UD_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI1:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
 // CHECK-NEXT:    [[TI2:%.*]] = alloca [[STRUCT_TWOINTS]], align 1
@@ -1199,6 +1199,7 @@ void case25(EmptyDerived ED, UnnamedDerived UD) {
 // CHECK-LABEL: define hidden void @_Z6case267TwoInts(
 // CHECK-SAME: ptr noundef dead_on_return [[TI:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[TI_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[F:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[F2:%.*]] = alloca <3 x float>, align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 3b7111ee2fd02..e02491d71cc72 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -153,6 +153,7 @@ struct Derived : BFields {
 // Derived Struct truncate to scalar
 // CHECK-LABEL: call9
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:  [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT: [[D2:%.*]] = alloca double, align 8
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
@@ -204,6 +205,7 @@ export void call10(int4 I) {
 // truncate derived struct
 // CHECK-LABEL: call11
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:  [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT: [[B:%.*]] = alloca %struct.BFields, align 1
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl
index 5c6940c4d2e4d..023c369059bcb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructPassing.hlsl
@@ -13,6 +13,7 @@ struct S : P {
 // CHECK-LABEL: define hidden void @_Z5case11S(
 // CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[SLOCAL:%.*]] = alloca [[STRUCT_S:%.*]], align 1
 // CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
@@ -27,6 +28,7 @@ void case1(S s) {
 // CHECK-LABEL: define hidden void @_Z5case21S(
 // CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[SLOCAL:%.*]] = alloca [[STRUCT_S:%.*]], align 1
 // CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S]], align 1
@@ -46,6 +48,7 @@ void useS(S s) {}
 // CHECK-LABEL: define hidden void @_Z5case31S(
 // CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1
 // CHECK-NEXT:    store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 4
@@ -61,6 +64,7 @@ void case3(S s) {
 // CHECK-LABEL: define hidden void @_Z5case41S(
 // CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[PLOCAL:%.*]] = alloca [[STRUCT_P:%.*]], align 1
 // CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1
@@ -77,6 +81,7 @@ void case4(S s) {
 // CHECK-LABEL: define hidden void @_Z5case51S(
 // CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_P:%.*]], align 1
 // CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_P]], align 1
@@ -98,6 +103,7 @@ void useP(P p) {}
 // CHECK-LABEL: define hidden void @_Z5case61S(
 // CHECK-SAME: ptr noundef dead_on_return [[S:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:    [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[AGG_TMP:%.*]] = alloca [[STRUCT_P:%.*]], align 1
 // CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 31263bca443a6..89f5ff8f16825 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -95,6 +95,7 @@ struct Derived : BFields {
 // vector flat cast from derived struct with bitfield
 // CHECK-LABEL: call6
 // CHECK-NEXT: entry:
+// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT: [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT: [[A:%.*]] = alloca <4 x i32>, align 4
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
@@ -221,6 +222,7 @@ struct BoolVecStruct {
 // vector flat cast from struct containing bool vector
 // CHECK-LABEL: call10
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:  %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
 // CHECK-NEXT:  [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[V:%.*]] = alloca <2 x i32>, align 4
 // CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
index d1eb2f740dbf2..d29ec8e9f87b9 100644
--- a/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
+++ b/clang/test/CodeGenHLSL/resources/cbuffer_struct_passing.hlsl
@@ -2,7 +2,7 @@
 // RUN:     FileCheck %s -DCONST_ADDR_SPACE=2 -DPADDING_TYPE="dx.Padding"
 
 // RUN: %clang_cc1 -triple spirv-pc-vulkan1.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | \
-// RUN:     FileCheck %s -DCONST_ADDR_SPACE=12 -DPADDING_TYPE="spirv.Padding" --check-prefixes=CHECK,SPIRV
+// RUN:     FileCheck %s -DCONST_ADDR_SPACE=12 -DPADDING_TYPE="spirv.Padding"
 
 struct P {
   float3 a;
@@ -33,7 +33,7 @@ cbuffer CB {
 
 // CHECK-LABEL: case1
 // CHECK-NEXT: entry:
-// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+// CHECK-NEXT: call token @llvm.experimental.convergence.entry()
 //
   // Copy S field by field into local variable in default address space.
 //
@@ -57,7 +57,7 @@ void case1() {
 
 // CHECK-LABEL: case2
 // CHECK-NEXT: entry:
-// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+// CHECK-NEXT: call token @llvm.experimental.convergence.entry()
 //
 // Copy S field by field into a temporary variable in default address space.
 //
@@ -130,7 +130,7 @@ void case2() {
 
 // CHECK-LABEL: case3
 // CHECK-NEXT: entry:
-// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+// CHECK-NEXT: call token @llvm.experimental.convergence.entry()
 void case3() {
 
 // CHECK-NEXT: [[LocalT:%.*]] = alloca %struct.T, align 1
@@ -191,7 +191,7 @@ void case3() {
 
 // CHECK-LABEL: case4
 // CHECK-NEXT: entry:
-// SPIRV-NEXT: call token @llvm.experimental.convergence.entry()
+// CHECK-NEXT: call token @llvm.experimental.convergence.entry()
 void case4() {
 
 // CHECK-NEXT: [[LocalP1:%.*]] = alloca %struct.P, align 1
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 34c57ed76a67e..8ebc5f696e33e 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -37,8 +37,8 @@ void main() {
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[Another]], i32 8, i1 false)
 // CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
 // CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
-// CHECK-NEXT: %0 = load i32, ptr [[First]], align 1
-// CHECK-NEXT: ret i32 %0
+// CHECK-NEXT: %[[LOAD:.*]] = load i32, ptr [[First]], align 1
+// CHECK-NEXT: ret i32 %[[LOAD]]
 
 // CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
@@ -53,5 +53,5 @@ void main() {
 // CHECK-NEXT: store i32 123, ptr [[Second]], align 1
 // CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
 // CHECK-NEXT: [[Second2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
-// CHECK-NEXT: %0 = load i32, ptr [[Second2]], align 1
-// CHECK-NEXT: ret i32 %0
+// CHECK-NEXT: %[[LOAD:.*]] = load i32, ptr [[Second2]], align 1
+// CHECK-NEXT: ret i32 %[[LOAD]]

>From 10ad30d343ae340dbc61a4f2f8ea649f8d71d7a0 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Wed, 3 Jun 2026 12:52:12 -0700
Subject: [PATCH 16/21] remove stray files

---
 clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak    | 160 ------------------
 .../this-assignment-overload.hlsl.bak         |  58 -------
 2 files changed, 218 deletions(-)
 delete mode 100644 clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak
 delete mode 100644 clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak

diff --git a/clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak b/clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak
deleted file mode 100644
index a17e0ae6038b1..0000000000000
--- a/clang/test/CodeGenHLSL/BoolMatrix.hlsl.bak
+++ /dev/null
@@ -1,160 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
-
-// CHECK: [[ConstInit_fn3_s:@.*]] = private unnamed_addr constant { <4 x i1>, [12 x i8], float } { <4 x i1> <i1 true, i1 false, i1 true, i1 false>, [12 x i8] undef, float 1.000000e+00 }, align 1
-// CHECK: [[ConstInit_fn4_Arr:@.*]] = private unnamed_addr constant [2 x <4 x i1>] [<4 x i1> splat (i1 true), <4 x i1> zeroinitializer], align 4
-// CHECK: [[ConstInit_fn6_s:@.*]] = private unnamed_addr constant { <4 x i1>, [12 x i8], float } { <4 x i1> <i1 true, i1 false, i1 true, i1 false>, [12 x i8] undef, float 1.000000e+00 }, align 1
-// CHECK: [[ConstInit_fn7_Arr:@.*]] = private unnamed_addr constant [2 x <4 x i1>] [<4 x i1> splat (i1 true), <4 x i1> zeroinitializer], align 4
-
-
-struct S {
-    bool2x2 bM;
-    float f;
-};
-
-// CHECK-LABEL: define hidden noundef i1 @_Z3fn1v(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
-// CHECK-NEXT:    [[B:%.*]] = alloca [2 x <2 x i32>], align 4
-// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[B]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[B]], align 4
-// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
-// CHECK-NEXT:    store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret i1 [[TMP1]]
-//
-bool fn1() {
-  bool2x2 B = {true,true,true,true};
-  return B[0][0];
-}
-
-// CHECK-LABEL: define hidden noundef <4 x i1> @_Z3fn2b(
-// CHECK-SAME: i1 noundef [[V:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x i1>, align 4
-// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[A:%.*]] = alloca [2 x <2 x i32>], align 4
-// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[V]] to i32
-// CHECK-NEXT:    store i32 [[STOREDV]], ptr [[V_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
-// CHECK-NEXT:    [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i1> poison, i1 [[LOADEDV]], i32 0
-// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x i1> [[VECINIT]], i1 true, i32 2
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4
-// CHECK-NEXT:    [[LOADEDV2:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x i1> [[VECINIT1]], i1 [[LOADEDV2]], i32 1
-// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <4 x i1> [[VECINIT3]], i1 false, i32 3
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[VECINIT4]] to <4 x i32>
-// CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[A]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
-// CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i1>, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret <4 x i1> [[TMP4]]
-//
-bool2x2 fn2(bool V) {
-  bool2x2 A = {V, true, V, false};
-  return A;
-}
-
-// CHECK-LABEL: define hidden noundef i1 @_Z3fn3v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
-// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[ConstInit_fn3_s]], i32 20, i1 false)
-// CHECK-NEXT:    [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BM]], align 1
-// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
-// CHECK-NEXT:    store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret i1 [[TMP1]]
-//
-bool fn3() {
-  S s = {{true,true,false,false}, 1.0};
-  return s.bM[0][0];
-}
-
-// CHECK-LABEL: define hidden noundef i1 @_Z3fn4v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
-// CHECK-NEXT:    [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 %Arr, ptr align 4 [[ConstInit_fn4_Arr]], i32 8, i1 false)
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
-// CHECK-NEXT:    store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret i1 [[TMP1]]
-//
-bool fn4() {
-  bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
-  return Arr[0][1][0];
-}
-
-// CHECK-LABEL: define hidden void @_Z3fn5v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[M:%.*]] = alloca [2 x <2 x i32>], align 4
-// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[M]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[M]], i32 0, i32 3
-// CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
-// CHECK-NEXT:    ret void
-//
-void fn5() {
-  bool2x2 M = {true,true,true,true};
-  M[1][1] = false;
-}
-
-// CHECK-LABEL: define hidden void @_Z3fn6v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
-// CHECK-NEXT:    store i32 0, ptr [[V]], align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[ConstInit_fn6_s]], i32 20, i1 false)
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[V]], align 4
-// CHECK-NEXT:    [[LOADEDV:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK-NEXT:    [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[LOADEDV]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <4 x i32>, ptr [[BM]], i32 0, i32 1
-// CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
-// CHECK-NEXT:    ret void
-//
-void fn6() {
-  bool V = false;
-  S s = {{true,true,false,false}, 1.0};
-  s.bM[1][0] = V;
-}
-
-// CHECK-LABEL: define hidden void @_Z3fn7v(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 [[ConstInit_fn7_Arr]], i32 8, i1 false)
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[ARRAYIDX]], i32 0, i32 1
-// CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
-// CHECK-NEXT:    ret void
-//
-void fn7() {
-  bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
-  Arr[0][1][0] = false;
-}
-
-// CHECK-LABEL: define hidden noundef <16 x i1> @_Z3fn8u11matrix_typeILm4ELm4EbE(
-// CHECK-SAME: <16 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i1>, align 4
-// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
-// CHECK-NEXT:    store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
-// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i1>, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret <16 x i1> [[TMP2]]
-//
-bool4x4 fn8(bool4x4 m) {
-  return m;
-}
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak
deleted file mode 100644
index 7b08f7b96c624..0000000000000
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl.bak
+++ /dev/null
@@ -1,58 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes -o - -std=hlsl202x %s | FileCheck %s
-
-struct Pair {
-  int First;
-  int Second;
-  int getFirst() {
-    Pair Another = {5, 10};
-    this = Another;
-    return this.First;
-  }
-  int getSecond() {
-    this = {0, 123};
-    return Second;
-  }
-  void operator=(Pair P) {
-    First = P.First;
-    Second = 2;
-  }
-};
-[numthreads(1, 1, 1)]
-void main() {
-  Pair Vals = {1, 2};
-  Vals.First = Vals.getFirst();
-  Vals.Second = Vals.getSecond();
-}
-
-// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: %Another = alloca %struct.Pair, align 1
-// CHECK-NEXT: [[AggTmp:%.*]] = alloca %struct.Pair, align 1
-// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
-// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
-// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
-// CHECK-NEXT: store i32 5, ptr [[First]], align 1
-// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
-// CHECK-NEXT: store i32 10, ptr [[Second]], align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AggTmp]], ptr align 1 %Another, i32 8, i1 false)
-// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[AggTmp]])
-// CHECK-NEXT: [[First2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
-// CHECK-NEXT: %0 = load i32, ptr [[First2]], align 1
-// CHECK-NEXT: ret i32 %0
-
-// CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
-// CHECK-NEXT:entry:
-// CHECK-NEXT: [[ThisPtrAdds:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
-// CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
-// CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
-// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: store i32 0, ptr [[First]], align 1
-// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: store i32 123, ptr [[Second]], align 1
-// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
-// CHECK-NEXT: [[Second2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
-// CHECK-NEXT: %0 = load i32, ptr [[Second2]], align 1
-// CHECK-NEXT: ret i32 %0

>From e905a3e04d18b72c12a47dbdfbe25acb827ec50c Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 4 Jun 2026 09:54:39 -0700
Subject: [PATCH 17/21] Add isHLSLBuiltinRecord bit to CXXRecordDecl definiton
 data and use it instead of hasUserProvidedSpecialMembers()

---
 .../clang/AST/CXXRecordDeclDefinitionBits.def |  4 +
 clang/include/clang/AST/DeclCXX.h             | 80 ++++++-------------
 clang/lib/AST/Type.cpp                        |  3 +-
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp |  1 +
 clang/lib/Sema/SemaHLSL.cpp                   |  6 +-
 clang/lib/Sema/SemaInit.cpp                   |  4 +-
 clang/lib/Sema/SemaOverload.cpp               |  2 +-
 clang/lib/Sema/SemaTemplateInstantiate.cpp    |  2 +
 .../CodeGenHLSL/this-assignment-overload.hlsl | 10 +--
 9 files changed, 44 insertions(+), 68 deletions(-)

diff --git a/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def b/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
index 7e6e2147a448d..97e61aaec7d51 100644
--- a/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
+++ b/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
@@ -253,6 +253,10 @@ FIELD(IsAnyDestructorNoReturn, 1, NO_MERGE)
 /// type that is intangible). HLSL only.
 FIELD(IsHLSLIntangible, 1, NO_MERGE)
 
+/// Whether the record type is a built-in HLSL record which must be handled
+/// differently by the compiler than user-defined records. HLSL only.
+FIELD(IsHLSLBuiltinRecord, 1, NO_MERGE)
+
 /// Whether the pointer fields in this class should have pointer field
 /// protection (PFP) by default, either because of an attribute, the
 /// -fexperimental-pointer-field-protection-abi compiler flag or inheritance
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 36cd8f519ea34..868a9bb52215c 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -759,38 +759,20 @@ class CXXRecordDecl : public RecordDecl {
            needsImplicitDefaultConstructor();
   }
 
-  // Used by HLSL to determine if a record is a built-in implicit HLSL
-  // struct/class or a user-defined one. User-defined HLSL records cannot
-  // have ctors, dtors, or overloaded operators, while implicit built-in
-  // HLSL records such as resource classes can. It would be nice to use the
-  // isImplicit() methods to determine that, but this flag is not propagated
-  // to template-instantiated classes.
-  //
-  /// Determines whether this class has any user provided special members.
-  bool hasUserProvidedSpecialMembers() const {
-    return data().UserDeclaredSpecialMembers & SMF_All ||
-           data().UserDeclaredConstructor ||
-           data().UserProvidedDefaultConstructor;
-  }
-
   /// Determine if we need to declare a default constructor for
   /// this class.
   ///
   /// This value is used for lazy creation of default constructors.
   bool needsImplicitDefaultConstructor() const {
-    // In HLSL, only built-in records like resource classes can have
-    // constructors and overloadable operators.
-    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
-      return false;
-
-    return (!data().UserDeclaredConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
-            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
-           // FIXME: Proposed fix to core wording issue: if a class inherits
-           // a default constructor and doesn't explicitly declare one, one
-           // is declared implicitly.
-           (data().HasInheritedDefaultConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor));
+    return ((!data().UserDeclaredConstructor &&
+             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
+             (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
+            // FIXME: Proposed fix to core wording issue: if a class inherits
+            // a default constructor and doesn't explicitly declare one, one
+            // is declared implicitly.
+            (data().HasInheritedDefaultConstructor &&
+             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -816,12 +798,8 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
-    // In HLSL, only built-in records like resource classes can have
-    // constructors and overloadable operators.
-    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
-      return false;
-
-    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor);
+    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor) &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -914,16 +892,11 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class should get an implicit move
   /// constructor or if any existing special member function inhibits this.
   bool needsImplicitMoveConstructor() const {
-    // In HLSL, only built-in records like resource classes can have
-    // constructors and overloadable operators.
-    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
-      return false;
-
     return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveAssignment() &&
-           !hasUserDeclaredDestructor();
+           !hasUserDeclaredMoveAssignment() && !hasUserDeclaredDestructor() &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
   }
 
   /// Determine whether we need to eagerly declare a defaulted move
@@ -952,12 +925,8 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
-    // In HLSL, only built-in records like resource classes can have
-    // constructors and overloadable operators.
-    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
-      return false;
-
-    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment);
+    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment) &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -1015,17 +984,12 @@ class CXXRecordDecl : public RecordDecl {
   /// assignment operator or if any existing special member function inhibits
   /// this.
   bool needsImplicitMoveAssignment() const {
-    // In HLSL, only built-in records like resource classes can have
-    // constructors and overloadable operators.
-    if (getLangOpts().HLSL && !hasUserProvidedSpecialMembers())
-      return false;
-
     return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveConstructor() &&
-           !hasUserDeclaredDestructor() &&
-           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable());
+           !hasUserDeclaredMoveConstructor() && !hasUserDeclaredDestructor() &&
+           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
   }
 
   /// Determine whether we need to eagerly declare a move assignment
@@ -1594,6 +1558,14 @@ class CXXRecordDecl : public RecordDecl {
   /// a field or in base class.
   bool isHLSLIntangible() const { return data().IsHLSLIntangible; }
 
+  /// Returns true if the class is a built-in HLSL record.
+  bool isHLSLBuiltinRecord() const { return data().IsHLSLBuiltinRecord; }
+
+  /// Sets the flag that the class is a built-in HLSL record.
+  void setIsHLSLBuiltinRecord(bool Value) {
+    data().IsHLSLBuiltinRecord = Value;
+  }
+
   /// If the class is a local class [class.local], returns
   /// the enclosing function declaration.
   const FunctionDecl *isLocalClass() const {
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index d4504e6b04f8c..43b8cdc686cb9 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3303,8 +3303,7 @@ bool QualType::isHLSLPODType(const ASTContext &Context) const {
           dyn_cast_or_null<CXXRecordDecl>(BaseTy->getAsRecordDecl())) {
     // User-defined records in HLSL do not have constructors or copy/assignment
     // operators. They are still considered POD.
-    if (!RD->hasUserProvidedSpecialMembers() || RD->isTrivial() ||
-        RD->isStandardLayout())
+    if (!RD->isHLSLBuiltinRecord() || RD->isTrivial() || RD->isStandardLayout())
       return true;
   }
   return false;
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index de170c86400d2..40e4f4024d6da 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -2167,6 +2167,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::completeDefinition() {
          "Definition must be started before completing it.");
 
   Record->completeDefinition();
+  Record->setIsHLSLBuiltinRecord(true);
   return *this;
 }
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index c201f24cf5049..24409088b62d8 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -5650,7 +5650,7 @@ bool SemaHLSL::ActOnUninitializedVarDecl(VarDecl *VD) {
   while (Ty->isArrayType())
     Ty = Ty->getArrayElementTypeNoTypeQual()->getUnqualifiedDesugaredType();
   if (CXXRecordDecl *RD = Ty->getAsCXXRecordDecl())
-    return !RD->hasUserProvidedSpecialMembers();
+    return !RD->isHLSLBuiltinRecord();
 
   return false;
 }
@@ -5751,9 +5751,7 @@ bool SemaHLSL::canHaveOverloadedBinOp(QualType LHSTy, BinaryOperatorKind Opc) {
   CXXRecordDecl *RD = LHSTy->getAsCXXRecordDecl();
   if (!RD)
     return true;
-  // hasUserProvidedSpecialMembers() should be true only for HLSL built-in
-  // records like resources.
-  return RD->hasUserProvidedSpecialMembers() || Opc != BO_Assign;
+  return RD->isHLSLBuiltinRecord() || Opc != BO_Assign;
 }
 
 // Walks though the global variable declaration, collects all resource binding
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 745f1aef40ea9..8f685feac4beb 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -6937,7 +6937,7 @@ void InitializationSequence::InitializeFrom(Sema &S,
   //        constructors or conversion functions)
   if (DestType->isRecordType() &&
       (!S.getLangOpts().HLSL ||
-       DestType->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers())) {
+       DestType->getAsCXXRecordDecl()->isHLSLBuiltinRecord())) {
     //     - If the initialization is direct-initialization, or if it is
     //       copy-initialization where the cv-unqualified version of the
     //       source type is the same class as, or a derived class of, the
@@ -7026,7 +7026,7 @@ void InitializationSequence::InitializeFrom(Sema &S,
   //      constructors or conversion functions).
   if (!SourceType.isNull() && SourceType->isRecordType() &&
       (!S.getLangOpts().HLSL ||
-       SourceType->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers())) {
+       SourceType->getAsCXXRecordDecl()->isHLSLBuiltinRecord())) {
     assert(Initializer && "Initializer must be non-null");
     // For a conversion to _Atomic(T) from either T or a class type derived
     // from T, initialize the T object then convert to _Atomic type.
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 70ad3adc636e3..b3e0d5a48a521 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15537,7 +15537,7 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
   if (Opc == BO_Assign &&
       (!LHSTy->isOverloadableType() ||
        (getLangOpts().HLSL && LHSTy->isRecordType() &&
-        !LHSTy->getAsCXXRecordDecl()->hasUserProvidedSpecialMembers())))
+        !LHSTy->getAsCXXRecordDecl()->isHLSLBuiltinRecord())))
     return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
 
   // Build the overload set.
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index f168c99d1ac1a..758abc2c2bac1 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -3727,6 +3727,8 @@ bool Sema::InstantiateClassImpl(
     }
   }
 
+  Instantiation->setIsHLSLBuiltinRecord(Pattern->isHLSLBuiltinRecord());
+
   // Exit the scope of this instantiation.
   SavedContext.pop();
 
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 8ebc5f696e33e..bf5a125d345d1 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -34,8 +34,8 @@ void main() {
 // CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
 // CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Another]], ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[Another]], i32 8, i1 false)
-// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ThisPtr]], ptr align 1 [[Another]], i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[ThisPtr]], i32 8, i1 false)
 // CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
 // CHECK-NEXT: %[[LOAD:.*]] = load i32, ptr [[First]], align 1
 // CHECK-NEXT: ret i32 %[[LOAD]]
@@ -47,11 +47,11 @@ void main() {
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
 // CHECK-NEXT: store ptr %this, ptr [[ThisPtrAdds]], align 4
 // CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAdds]], align 4
-// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[First:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
 // CHECK-NEXT: store i32 0, ptr [[First]], align 1
-// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[Tmp]], i32 0, i32 1
+// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
 // CHECK-NEXT: store i32 123, ptr [[Second]], align 1
-// CHECK-NEXT: call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) [[ThisPtr]], ptr noundef byval(%struct.Pair) align 1 [[Tmp]])
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[ThisPtr]], i32 8, i1 false)
 // CHECK-NEXT: [[Second2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
 // CHECK-NEXT: %[[LOAD:.*]] = load i32, ptr [[Second2]], align 1
 // CHECK-NEXT: ret i32 %[[LOAD]]

>From fa04e38add72570dc8f33e34452ef0435920bb64 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 4 Jun 2026 14:53:06 -0700
Subject: [PATCH 18/21] code review feedback

---
 clang/include/clang/AST/TypeBase.h            | 11 +++----
 clang/lib/AST/Type.cpp                        | 31 ++++++++-----------
 clang/lib/Sema/SemaExpr.cpp                   | 15 +++++----
 clang/lib/Sema/SemaOverload.cpp               |  5 ++-
 .../BasicFeatures/ArrayElementwiseCast.hlsl   | 10 ++----
 5 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index 779a4eba3468d..3a9918b33082a 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -1111,11 +1111,6 @@ class QualType {
   /// CXXRecordDecl::isCXX11StandardLayout, this takes DRs into account.
   bool isCXX11PODType(const ASTContext &Context) const;
 
-  /// Return true if this is a POD type according to the HLSL rules.
-  /// User-defined records in HLSL do not have default constructors, which
-  /// is a POD requirement for C++.
-  bool isHLSLPODType(const ASTContext &Context) const;
-
   /// Return true if this is a trivial type per (C++0x [basic.types]p9)
   bool isTrivialType(const ASTContext &Context) const;
 
@@ -2803,8 +2798,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
   bool isHLSLInlineSpirvType() const;
   bool isHLSLResourceRecord() const;
   bool isHLSLResourceRecordArray() const;
-  bool isHLSLIntangibleType()
-      const; // Any HLSL intangible type (builtin, array, class)
+  // Any HLSL intangible type (builtin, array, class)
+  bool isHLSLIntangibleType() const;
+  // User-defined HLSL records or arrays of such records
+  bool isHLSLStandardRecordOrArrayOf() const;
 
   /// Determines if this type, which must satisfy
   /// isObjCLifetimeType(), is implicitly __unsafe_unretained rather
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 43b8cdc686cb9..cbead120cbffc 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2786,9 +2786,9 @@ QualType Type::getRVVEltType(const ASTContext &Ctx) const {
 }
 
 bool QualType::isPODType(const ASTContext &Context) const {
-  // HLSL has a more relaxed definition of POD than C++11.
-  if (Context.getLangOpts().HLSL)
-    return isHLSLPODType(Context);
+  if (Context.getLangOpts().HLSL &&
+      getTypePtr()->isHLSLStandardRecordOrArrayOf())
+    return true;
 
   // C++11 has a more relaxed definition of POD.
   if (Context.getLangOpts().CPlusPlus11)
@@ -3294,21 +3294,6 @@ bool QualType::isCXX11PODType(const ASTContext &Context) const {
   return false;
 }
 
-bool QualType::isHLSLPODType(const ASTContext &Context) const {
-  if (isCXX11PODType(Context))
-    return true;
-
-  const Type *BaseTy = getTypePtr()->getBaseElementTypeUnsafe();
-  if (const auto *RD =
-          dyn_cast_or_null<CXXRecordDecl>(BaseTy->getAsRecordDecl())) {
-    // User-defined records in HLSL do not have constructors or copy/assignment
-    // operators. They are still considered POD.
-    if (!RD->isHLSLBuiltinRecord() || RD->isTrivial() || RD->isStandardLayout())
-      return true;
-  }
-  return false;
-}
-
 bool Type::isNothrowT() const {
   if (const auto *RD = getAsCXXRecordDecl()) {
     IdentifierInfo *II = RD->getIdentifier();
@@ -5548,6 +5533,16 @@ bool Type::isHLSLIntangibleType() const {
   return RD->isHLSLIntangible();
 }
 
+bool Type::isHLSLStandardRecordOrArrayOf() const {
+  const Type *BaseTy = getBaseElementTypeUnsafe();
+  if (const auto *RD =
+          dyn_cast_or_null<CXXRecordDecl>(BaseTy->getAsRecordDecl())) {
+    if (!RD->isHLSLBuiltinRecord())
+      return true;
+  }
+  return false;
+}
+
 QualType::DestructionKind QualType::isDestructedTypeImpl(QualType type) {
   switch (type.getObjCLifetime()) {
   case Qualifiers::OCL_None:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fbd4d275f5f7f..aa32332dd577d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16117,12 +16117,15 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   }
 
   if (getLangOpts().CPlusPlus) {
-    if ((LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent() ||
-         LHSExpr->getType()->isOverloadableType() ||
-         RHSExpr->getType()->isOverloadableType()) &&
-        (!getLangOpts().HLSL ||
-         HLSL().canHaveOverloadedBinOp(LHSExpr->getType(), Opc) ||
-         HLSL().canHaveOverloadedBinOp(RHSExpr->getType(), Opc)))
+    bool CanOverloadBinOp =
+        !getLangOpts().HLSL ||
+        HLSL().canHaveOverloadedBinOp(LHSExpr->getType(), Opc) ||
+        HLSL().canHaveOverloadedBinOp(RHSExpr->getType(), Opc);
+    bool TypeDependent =
+        LHSExpr->isTypeDependent() || RHSExpr->isTypeDependent();
+    bool Overloadable = LHSExpr->getType()->isOverloadableType() ||
+                        RHSExpr->getType()->isOverloadableType();
+    if (CanOverloadBinOp && (TypeDependent || Overloadable))
       return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
   }
 
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index b3e0d5a48a521..543c97981bd6a 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1831,9 +1831,6 @@ TryImplicitConversion(Sema &S, Expr *From, QualType ToType,
   //   given Conversion rank, in spite of the fact that a copy/move
   //   constructor (i.e., a user-defined conversion function) is
   //   called for those cases.
-  // HLSL:
-  //   A conversion of an expression of class type to the same class
-  //   type needs implicit LvaluetoRvalue conversion.
   QualType FromType = From->getType();
   if (ToType->isRecordType() &&
       (S.Context.hasSameUnqualifiedType(FromType, ToType) ||
@@ -1849,6 +1846,8 @@ TryImplicitConversion(Sema &S, Expr *From, QualType ToType,
     // appropriate constructor to copy the returned object, if needed.
     ICS.Standard.CopyConstructor = nullptr;
 
+    // In HLSL, a conversion of an expression of class type to the same class
+    // type needs implicit LvaluetoRvalue conversion.
     if (S.getLangOpts().HLSL)
       ICS.Standard.First = ICK_Lvalue_To_Rvalue;
 
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index b8611914efb29..f684eecf20ff1 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -156,13 +156,9 @@ struct Derived : BFields {
 
 // flatten from a derived struct with bitfields
 // CHECK-LABEL: call8
-// CHECK-NEXT: entry:
-// CHECK-NEXT: %[[#C_ENTRY:]] = call token @llvm.experimental.convergence.entry()
-// CHECK-NEXT: [[DIndirectAddr:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 4
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: store ptr %D, ptr [[DIndirectAddr]], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK: [[A:%.*]] = alloca [4 x i32], align 4
+// CHECK: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 1
 // CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 2

>From 38926b979fc7381744cbc2726a3ee64ebdfa9951 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Mon, 8 Jun 2026 17:57:43 -0700
Subject: [PATCH 19/21] after-merge fixes and function rename

---
 clang/include/clang/AST/TypeBase.h | 4 ++--
 clang/lib/AST/Type.cpp             | 6 +++---
 clang/lib/CodeGen/CGExprAgg.cpp    | 6 +++++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index 3a9918b33082a..65a35342fb09b 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -2800,8 +2800,8 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
   bool isHLSLResourceRecordArray() const;
   // Any HLSL intangible type (builtin, array, class)
   bool isHLSLIntangibleType() const;
-  // User-defined HLSL records or arrays of such records
-  bool isHLSLStandardRecordOrArrayOf() const;
+  // User-defined HLSL records or arrays of such records in standard layout
+  bool isHLSLStandardLayoutRecordOrArrayOf() const;
 
   /// Determines if this type, which must satisfy
   /// isObjCLifetimeType(), is implicitly __unsafe_unretained rather
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index cbead120cbffc..2f6d8c1aa59ce 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2787,7 +2787,7 @@ QualType Type::getRVVEltType(const ASTContext &Ctx) const {
 
 bool QualType::isPODType(const ASTContext &Context) const {
   if (Context.getLangOpts().HLSL &&
-      getTypePtr()->isHLSLStandardRecordOrArrayOf())
+      getTypePtr()->isHLSLStandardLayoutRecordOrArrayOf())
     return true;
 
   // C++11 has a more relaxed definition of POD.
@@ -5533,11 +5533,11 @@ bool Type::isHLSLIntangibleType() const {
   return RD->isHLSLIntangible();
 }
 
-bool Type::isHLSLStandardRecordOrArrayOf() const {
+bool Type::isHLSLStandardLayoutRecordOrArrayOf() const {
   const Type *BaseTy = getBaseElementTypeUnsafe();
   if (const auto *RD =
           dyn_cast_or_null<CXXRecordDecl>(BaseTy->getAsRecordDecl())) {
-    if (!RD->isHLSLBuiltinRecord())
+    if (!RD->isHLSLBuiltinRecord() && RD->isStandardLayout())
       return true;
   }
   return false;
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index b70aa2bf33b9b..8540603c28e9a 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -881,7 +881,11 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
     // Create a temporary for the derived record, switch it out with the current
     // Dest slot, and emit the derived value.
     QualType DerivedTy = E->getSubExpr()->getType();
-    AggValueSlot DerivedTmpSlot = CGF.CreateAggTemp(DerivedTy, "tmp");
+    RawAddress DerivedAddr = CGF.CreateMemTempWithoutCast(DerivedTy);
+    AggValueSlot DerivedTmpSlot = AggValueSlot::forAddr(
+        DerivedAddr, DerivedTy.getQualifiers(), AggValueSlot::IsNotDestructed,
+        AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
+        AggValueSlot::DoesNotOverlap);
 
     AggValueSlot DestBaseSlot = Dest;
     Dest = DerivedTmpSlot;

>From 280781b7517fb25e34e27d57adad588df6b927c7 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 11 Jun 2026 21:36:15 -0700
Subject: [PATCH 20/21] refactor condition to keep original formatting

---
 clang/include/clang/AST/DeclCXX.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 868a9bb52215c..db05fa38818be 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -893,10 +893,11 @@ class CXXRecordDecl : public RecordDecl {
   /// constructor or if any existing special member function inhibits this.
   bool needsImplicitMoveConstructor() const {
     return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord()) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveAssignment() && !hasUserDeclaredDestructor() &&
-           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
+           !hasUserDeclaredMoveAssignment() &&
+           !hasUserDeclaredDestructor();
   }
 
   /// Determine whether we need to eagerly declare a defaulted move

>From 99dc19dd77f673384e2452e9390ffaa3c7b5bc85 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 11 Jun 2026 22:12:51 -0700
Subject: [PATCH 21/21] two more condition refactoring to keep original
 formatting and changes to a minimum + comment update

---
 clang/include/clang/AST/DeclCXX.h | 27 ++++++++++++++-------------
 clang/lib/Sema/SemaOverload.cpp   |  2 +-
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index db05fa38818be..28d171253dc03 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -763,16 +763,16 @@ class CXXRecordDecl : public RecordDecl {
   /// this class.
   ///
   /// This value is used for lazy creation of default constructors.
-  bool needsImplicitDefaultConstructor() const {
-    return ((!data().UserDeclaredConstructor &&
-             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
-             (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
-            // FIXME: Proposed fix to core wording issue: if a class inherits
-            // a default constructor and doesn't explicitly declare one, one
-            // is declared implicitly.
-            (data().HasInheritedDefaultConstructor &&
-             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
-           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
+    bool needsImplicitDefaultConstructor() const {
+    return (!getLangOpts().HLSL || isHLSLBuiltinRecord()) &&
+           ((!data().UserDeclaredConstructor &&
+            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
+            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
+           // FIXME: Proposed fix to core wording issue: if a class inherits
+           // a default constructor and doesn't explicitly declare one, one
+           // is declared implicitly.
+           (data().HasInheritedDefaultConstructor &&
+            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor)));
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -986,11 +986,12 @@ class CXXRecordDecl : public RecordDecl {
   /// this.
   bool needsImplicitMoveAssignment() const {
     return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
+           (!getLangOpts().HLSL || isHLSLBuiltinRecord()) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveConstructor() && !hasUserDeclaredDestructor() &&
-           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
-           (!getLangOpts().HLSL || isHLSLBuiltinRecord());
+           !hasUserDeclaredMoveConstructor() &&
+           !hasUserDeclaredDestructor() &&
+           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable());
   }
 
   /// Determine whether we need to eagerly declare a move assignment
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 4ee935e7e6443..1960a917f52cb 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15531,7 +15531,7 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
   // problems. So we do it this way, which pretty much follows what GCC does.
   // Note that we go the traditional code path for compound assignment forms.
   // In HLSL, user-defined structs/classes do not have constructors or
-  // overloadable operators, so we can take this shortcut too.
+  // overloadable assignment operators, so we can take this shortcut too.
   const Type *LHSTy = Args[0]->getType().getTypePtr();
   if (Opc == BO_Assign &&
       (!LHSTy->isOverloadableType() ||



More information about the cfe-commits mailing list