[clang] [HLSL] Remove support for constructors for user defined structs (PR #190089)

Sarah Spall via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 15 09:07:49 PDT 2026


https://github.com/spall updated https://github.com/llvm/llvm-project/pull/190089

>From cdf67547338b841e4eaa051c2b469bb02b187686 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Thu, 5 Mar 2026 12:48:54 -0800
Subject: [PATCH 1/9] disallow constructors for user defined types and do list
 initialization instead. update some tests

---
 clang/include/clang/AST/DeclCXX.h             | 27 ++++--
 .../clang/Basic/DiagnosticSemaKinds.td        |  1 +
 clang/lib/CodeGen/CGDecl.cpp                  |  5 +-
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  6 +-
 clang/lib/Sema/SemaCast.cpp                   |  4 +-
 clang/lib/Sema/SemaDecl.cpp                   |  8 ++
 clang/lib/Sema/SemaDeclCXX.cpp                | 12 +++
 clang/lib/Sema/SemaExpr.cpp                   | 32 ++++++-
 clang/lib/Sema/SemaExprCXX.cpp                |  5 +
 clang/lib/Sema/SemaHLSL.cpp                   |  4 +-
 clang/lib/Sema/SemaInit.cpp                   | 12 ++-
 clang/lib/Sema/SemaLookup.cpp                 |  2 +
 clang/lib/Sema/SemaOverload.cpp               | 23 ++++-
 clang/test/AST/HLSL/cbuffer.hlsl              |  4 +-
 clang/test/CodeGenHLSL/GlobalDestructors.hlsl | 94 -------------------
 .../CodeGenHLSL/convergence/global_array.hlsl | 22 -----
 .../groupsharedArgs/StructTest.hlsl           | 64 +++++++++++++
 .../test/CodeGenHLSL/inline-constructors.hlsl | 76 ---------------
 clang/test/CodeGenHLSL/this-assignment.hlsl   |  5 +-
 .../Language/AggregateSplatCast-errors.hlsl   |  6 +-
 .../Language/ElementwiseCast-errors.hlsl      |  6 +-
 clang/test/SemaHLSL/Language/InitLists.hlsl   |  8 +-
 .../Types/AggregateSplatConstantExpr.hlsl     | 30 ------
 .../Types/ElementwiseCastConstantExpr.hlsl    |  9 --
 .../SemaHLSL/Types/InitListConstantExpr.hlsl  | 43 +++++++++
 clang/test/SemaHLSL/prohibit_pointer.hlsl     |  1 +
 26 files changed, 244 insertions(+), 265 deletions(-)
 delete mode 100644 clang/test/CodeGenHLSL/GlobalDestructors.hlsl
 delete mode 100644 clang/test/CodeGenHLSL/convergence/global_array.hlsl
 create mode 100644 clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl
 delete mode 100644 clang/test/CodeGenHLSL/inline-constructors.hlsl

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 48fd12efdcafe..8f2704ae790ca 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -759,19 +759,29 @@ class CXXRecordDecl : public RecordDecl {
            needsImplicitDefaultConstructor();
   }
 
+  /// Determines whether this class has any user provided special members.
+  bool hasUserProvidedSpecialMembers() const {
+    return data().UserDeclaredSpecialMembers &
+      (SMF_MoveConstructor | SMF_MoveAssignment | SMF_Destructor |
+       SMF_CopyAssignment | SMF_CopyConstructor) ||
+      data().UserDeclaredConstructor ||
+      data().UserProvidedDefaultConstructor;
+  }
+
   /// Determine if we need to declare a default constructor for
   /// this class.
   ///
   /// This value is used for lazy creation of default constructors.
   bool needsImplicitDefaultConstructor() const {
-    return (!data().UserDeclaredConstructor &&
+    return ((!data().UserDeclaredConstructor &&
             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
             (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
            // FIXME: Proposed fix to core wording issue: if a class inherits
            // a default constructor and doesn't explicitly declare one, one
            // is declared implicitly.
            (data().HasInheritedDefaultConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor));
+            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
+      (!getLangOpts().HLSL || (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) || hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -797,7 +807,8 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
-    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor);
+    return !(data().DeclaredSpecialMembers & SMF_CopyConstructor) &&
+      (!getLangOpts().HLSL || isLambda() || hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -894,7 +905,9 @@ class CXXRecordDecl : public RecordDecl {
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
            !hasUserDeclaredMoveAssignment() &&
-           !hasUserDeclaredDestructor();
+           !hasUserDeclaredDestructor() &&
+      (!getLangOpts().HLSL || isLambda() || hasUserDeclaredConstructor()
+       || hasUserProvidedDefaultConstructor());
   }
 
   /// Determine whether we need to eagerly declare a defaulted move
@@ -923,7 +936,8 @@ class CXXRecordDecl : public RecordDecl {
   /// Determine whether this class needs an implicit copy
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
-    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment);
+    return !(data().DeclaredSpecialMembers & SMF_CopyAssignment) &&
+      (!getLangOpts().HLSL || isLambda() || hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -986,7 +1000,8 @@ class CXXRecordDecl : public RecordDecl {
            !hasUserDeclaredCopyAssignment() &&
            !hasUserDeclaredMoveConstructor() &&
            !hasUserDeclaredDestructor() &&
-           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable());
+           (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
+      (!getLangOpts().HLSL || (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) || hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a move assignment
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index db1e3630435d0..4e2f4a8dafb70 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13533,6 +13533,7 @@ def err_hlsl_builtin_resource_coordinate_dimension_mismatch : Error<
   "builtin %0 resource coordinate dimension mismatch: expected %1, found %2">;
 
 // HLSL Diagnostics
+def err_hlsl_cstor_dstor : Error<"HLSL doesn't support constructors or destructors">;
 def err_hlsl_langstd_unimplemented : Error<"support for HLSL language version %0 is incomplete">;
 def err_hlsl_attr_unsupported_in_stage : Error<"attribute %0 is unsupported in '%1' shaders, requires %select{|one of the following: }2%3">;
 def err_hlsl_attr_invalid_type : Error<
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 8b5ffde1b73f3..49f35f254403e 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1527,8 +1527,9 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
     if (D.getInit() && (Ty->isArrayType() || Ty->isRecordType()) &&
         (D.isConstexpr() ||
          ((Ty.isPODType(getContext()) ||
-           getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) &&
-          D.getInit()->isConstantInitializer(getContext(), false)))) {
+	   getContext().getBaseElementType(Ty)->isObjCObjectPointerType() ||
+	   getLangOpts().HLSL) &&
+	    D.getInit()->isConstantInitializer(getContext(), false)))) {
 
       // If the variable's a const type, and it's neither an NRVO
       // candidate nor a __block variable and has no mutable members,
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 4e6f853890c83..76effd217467d 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -978,7 +978,7 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
       llvm_unreachable("Not handled yet");
     } else {
       llvm::Type *ParamType =
-          Param.hasByValAttr() ? Param.getParamByValType() : Param.getType();
+	Param.hasByValAttr() ? Param.getParamByValType() : PD->getType()->isRecordType() ? CGM.getTypes().ConvertType(PD->getType()) : Param.getType();
       auto AttrBegin = PD->specific_attr_begin<HLSLAppliedSemanticAttr>();
       auto AttrEnd = PD->specific_attr_end<HLSLAppliedSemanticAttr>();
       auto Result =
@@ -986,8 +986,8 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
       SemanticValue = Result.first;
       if (!SemanticValue)
         return;
-      if (Param.hasByValAttr()) {
-        llvm::Value *Var = B.CreateAlloca(Param.getParamByValType());
+      if (Param.hasByValAttr() || PD->getType()->isRecordType()) {
+        llvm::Value *Var = B.CreateAlloca(ParamType);
         B.CreateStore(SemanticValue, Var);
         SemanticValue = Var;
       }
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 330e5ec699790..09c2e36512265 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -1939,9 +1939,11 @@ TryCastResult TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr,
   // There is no other way that works.
   // On the other hand, if we're checking a C-style cast, we've still got
   // the reinterpret_cast way.
+  // If an HLSLInitListFlattening failed then there is no fallback.
   bool CStyle = (CCK == CheckedConversionKind::CStyleCast ||
                  CCK == CheckedConversionKind::FunctionalCast);
-  if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()))
+  if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()) &&
+      InitSeq.getFailureKind() != InitializationSequence::FK_HLSLInitListFlatteningFailed)
     return TC_NotApplicable;
 
   ExprResult Result = InitSeq.Perform(Self, Entity, InitKind, SrcExprRaw);
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 2951fd09294d8..bf603901a5150 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14751,6 +14751,13 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
       = InitializationKind::CreateDefault(Var->getLocation());
 
     InitializationSequence InitSeq(*this, Entity, Kind, {});
+
+    // In HLSL don't default initialize user defined structs.
+    // Must have failed because there was no valid defined default constructor
+    if (InitSeq.Failed() && getLangOpts().HLSL &&
+	InitSeq.getFailureKind() == InitializationSequence::FK_ConstructorOverloadFailed)
+     return;
+    
     ExprResult Init = InitSeq.Perform(*this, Entity, Kind, {});
 
     if (Init.get()) {
@@ -14758,6 +14765,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
       // This is important for template substitution.
       Var->setInitStyle(VarDecl::CallInit);
     } else if (Init.isInvalid()) {
+      
       // If default-init fails, attach a recovery-expr initializer to track
       // that initialization was attempted and failed.
       auto RecoveryExpr =
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 441df43d3d184..d3f2eb578a227 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -3507,6 +3507,18 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D,
     }
   }
 
+  // HLSL prohibits constructors and destructors.
+  if (getLangOpts().HLSL) {
+    switch (Name.getNameKind()) {
+    case DeclarationName::CXXConstructorName:
+    case DeclarationName::CXXDestructorName:
+      Diag(Loc, diag::err_hlsl_cstor_dstor);
+      return nullptr;
+    default:
+      break;
+    }
+  }
+
   // C++ 9.2p6: A member shall not be declared to have automatic storage
   // duration (auto, register) or with the extern storage-class-specifier.
   // C++ 7.1.1p8: The mutable specifier can be applied only to names of class
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cbbb4f791ee80..5124f64d3e245 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10301,7 +10301,7 @@ AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType,
       return AssignConvertType::Compatible;
     }
 
-    if (ConvertRHS)
+    if (ConvertRHS) // && (!getLangOpts().HLSL || Context.getCanonicalType(RHS.get()->getType()) != Context.getCanonicalType(LHSType)))
       RHS = ImpCastExprToType(E, Ty, Kind);
   }
 
@@ -15442,6 +15442,16 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     InitializedEntity Entity =
         InitializedEntity::InitializeTemporary(LHSExpr->getType());
     InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
+
+    // If this is HLSL and LHS is a record we transform the init list
+    if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
+      InitListExpr *ILE = cast<InitListExpr>(RHSExpr);
+      if (!HLSL().transformInitList(Entity, ILE))
+	InitSeq.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
+      else
+	RHSExpr = ILE;
+    }
+
     ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
     if (Init.isInvalid())
       return Init;
@@ -15490,6 +15500,26 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
 
   switch (Opc) {
   case BO_Assign:
+    // If this is HLSL try to perform aggregate initialization.
+    if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
+      ResultTy = LHSExpr->getType();
+      InitListExpr *ILE = new (Context) InitListExpr(getASTContext(), RHSExpr->getBeginLoc(), {RHSExpr}, RHSExpr->getEndLoc());
+      ILE->setType(getASTContext().VoidTy);
+      InitializationKind Kind = InitializationKind::CreateDirectList(RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
+      InitializedEntity Entity =
+	InitializedEntity::InitializeTemporary(ResultTy);
+      RHSExpr = ILE;
+      InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
+      if (!HLSL().transformInitList(Entity, ILE))
+	InitSeq.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
+
+      ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
+      if (Init.isInvalid())
+	return Init;
+      RHS = Init.get();
+      break;
+    }
+
     ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType(), Opc);
     if (getLangOpts().CPlusPlus &&
         LHS.get()->getObjectKind() != OK_ObjCProperty) {
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index f7e005a40363c..1950bd4ab4a0c 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1602,6 +1602,11 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
         Context, Ty.getNonReferenceType(), TInfo, LParenOrBraceLoc, Exprs,
         RParenOrBraceLoc, ListInitialization);
 
+  // HLSL doesn't support constructors or c++ functional cast for structs
+  if (getLangOpts().HLSL && Ty->isRecordType())
+    return ExprError(Diag(TyBeginLoc, diag::err_ovl_no_viable_function_in_init)
+		     << Ty << FullRange);
+
   // C++ [expr.type.conv]p1:
   // If the expression list is a parenthesized single expression, the type
   // conversion expression is equivalent (in definedness, and if defined in
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 3b7b12a884f43..892ec9ee6381a 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -5878,8 +5878,10 @@ bool SemaHLSL::transformInitList(const InitializedEntity &Entity,
   }
   size_t ExpectedSize = ILT.DestTypes.size();
   size_t ActualSize = ILT.ArgExprs.size();
-  if (ExpectedSize == 0 && ActualSize == 0)
+  if (ExpectedSize == 0 && ActualSize == 0) {
+    Init->resizeInits(Ctx, 0);
     return true;
+  }
 
   // Reject empty initializer if *any* incomplete array exists structurally
   if (ActualSize == 0 && containsIncompleteArrayType(Entity.getType())) {
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index ede2b9beef49b..c0ebbad0c0232 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -4837,6 +4837,16 @@ static void TryConstructorOrParenListInitialization(
   TryConstructorInitialization(S, Entity, Kind, Args, DestType, DestType,
                                Sequence, /*IsListInit=*/false, IsAggrListInit);
 
+  // Try list initialization if this is hlsl
+  if (S.getLangOpts().HLSL && Sequence.Failed()) {
+    InitListExpr *ILE = new (S.Context) InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args, Args.back()->getEndLoc());
+    ILE->setType(S.getASTContext().VoidTy);
+    Args[0] = ILE;
+    // reset sequence as normal
+    Sequence.setSequenceKind(InitializationSequence::NormalSequence);
+    TryListInitialization(S, Entity, Kind, ILE, Sequence, /*TreatUnavailableAsInvalid=*/true);
+    return;
+  }
   //       * Otherwise, if no constructor is viable, the destination type
   //         is an aggregate class, and the initializer is a parenthesized
   //         expression-list, the object is initialized as follows. [...]
@@ -5970,7 +5980,7 @@ static void TryOrBuildParenListInitialization(
         return false;
 
       if (InitExpr)
-        *InitExpr = ER.get();
+	*InitExpr = ER.get();
       else
         InitExprs.push_back(ER.get());
     }
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index de53f6010a1b6..68a780382c0b0 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -3632,6 +3632,8 @@ CXXConstructorDecl *Sema::LookupMovingConstructor(CXXRecordDecl *Class,
 
 DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) {
   // If the implicit constructors have not yet been declared, do so now.
+  // HLSL doesn't support implicit constructors because constructors for
+  // user defined classes are not supported
   if (CanDeclareSpecialMemberFunction(Class)) {
     runWithSufficientStackSpace(Class->getLocation(), [&] {
       if (Class->needsImplicitDefaultConstructor())
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 1ca340e8b72c7..d2c73d44bb1b1 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15803,13 +15803,14 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
       // For class as left operand for assignment or compound assignment
       // operator do not fall through to handling in built-in, but report that
       // no overloaded assignment operator found
+      // Unless this is HLSL then do fall through to handling in built-in.
       ExprResult Result = ExprError();
       StringRef OpcStr = BinaryOperator::getOpcodeStr(Opc);
       auto Cands = CandidateSet.CompleteCandidates(*this, OCD_AllCandidates,
                                                    Args, OpLoc);
       DeferDiagsRAII DDR(*this,
                          CandidateSet.shouldDeferDiags(*this, Args, OpLoc));
-      if (Args[0]->getType()->isRecordType() &&
+      if (Args[0]->getType()->isRecordType() && !getLangOpts().HLSL && 
           Opc >= BO_Assign && Opc <= BO_OrAssign) {
         Diag(OpLoc,  diag::err_ovl_no_viable_oper)
              << BinaryOperator::getOpcodeStr(Opc)
@@ -15819,6 +15820,21 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
             << Args[0]->getType()
             << Args[0]->getSourceRange() << Args[1]->getSourceRange();
         }
+      } else if (getLangOpts().HLSL) {
+	// If this is HLSL fall back to builtin operation
+
+	// This is an erroneous use of an operator which can be overloaded by
+        // a non-member function. Check for non-member operators which were
+        // defined too late to be candidates.
+        if (DiagnoseTwoPhaseOperatorLookup(*this, Op, OpLoc, Args))
+          // FIXME: Recover by calling the found function.
+          return ExprError();
+
+	Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
+
+	if (!Result.isInvalid())
+	  return Result;
+	
       } else {
         // This is an erroneous use of an operator which can be overloaded by
         // a non-member function. Check for non-member operators which were
@@ -15830,6 +15846,11 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         // No viable function; try to create a built-in operation, which will
         // produce an error. Then, show the non-viable candidates.
         Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
+
+	// If this was HLSL it might not have produced an error.
+	if (getLangOpts().HLSL && !Result.isInvalid())
+	  return Result;
+	  
       }
       assert(Result.isInvalid() &&
              "C++ binary operator overloading is missing candidates!");
diff --git a/clang/test/AST/HLSL/cbuffer.hlsl b/clang/test/AST/HLSL/cbuffer.hlsl
index 487261af19133..087b68ab16c6d 100644
--- a/clang/test/AST/HLSL/cbuffer.hlsl
+++ b/clang/test/AST/HLSL/cbuffer.hlsl
@@ -151,9 +151,9 @@ cbuffer CB {
   void f() {}
   // CHECK: VarDecl {{.*}} SV 'hlsl_private float' static
   static float SV;
-  // CHECK: VarDecl {{.*}} s7 'EmptyStruct' callinit
+  // CHECK: VarDecl {{.*}} s7 'EmptyStruct'
   EmptyStruct s7;
-  // CHECK: VarDecl {{.*}} Buf 'RWBuffer<float>':'hlsl::RWBuffer<float>' callinit
+  // CHECK: VarDecl {{.*}} Buf 'RWBuffer<float>':'hlsl::RWBuffer<float>'
   RWBuffer<float> Buf;
   // CHECK: VarDecl {{.*}} ea 'EmptyArrayTypedef':'float[10][0]'
   EmptyArrayTypedef ea;
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
deleted file mode 100644
index 9d8c2e65a9598..0000000000000
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ /dev/null
@@ -1,94 +0,0 @@
-// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE-SPIRV,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE-DXIL,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE-DXIL,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-
-// Tests that constructors and destructors are appropriately generated for globals
-// and that their calls are inlined when AlwaysInline is run
-// but global variables are retained for the library profiles
-
-// Make sure global variable for ctors/dtors exist for lib profile.
-// LIB:@llvm.global_ctors
-// LIB:@llvm.global_dtors
-// Make sure global variable for ctors/dtors removed for compute profile.
-// CS-NOT:@llvm.global_ctors
-// CS-NOT:@llvm.global_dtors
-
-struct Tail {
-  Tail() {
-    add(1);
-  }
-
-  ~Tail() {
-    add(-1);
-  }
-
-  void add(int V) {
-    static int Count = 0;
-    Count += V;
-  }
-};
-
-struct Pupper {
-  static int Count;
-
-  Pupper() {
-    Count += 1; // :)
-  }
-
-  ~Pupper() {
-    Count -= 1; // :(
-  }
-} GlobalPup;
-
-void Wag() {
-  static Tail T;
-  T.add(0);
-}
-
-int Pupper::Count = 0;
-
-[numthreads(1,1,1)]
-[shader("compute")]
-void main(unsigned GI : SV_GroupIndex) {
-  Wag();
-}
-
-// CHECK:      define void @main()
-// CHECK-NEXT: entry:
-// Verify destructor is emitted
-// NOINLINE-DXIL-NEXT:   call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
-// NOINLINE-DXIL-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-// NOINLINE-DXIL-NEXT:   call void @_Z4mainj(i32 %0)
-// NOINLINE-DXIL-NEXT:   call void @_GLOBAL__D_a()
-// NOINLINE-DXIL-NEXT:   ret void
-
-// NOINLINE-SPIRV-NEXT:   %0 = call token @llvm.experimental.convergence.entry()
-// NOINLINE-SPIRV-NEXT:   call spir_func void @_GLOBAL__sub_I_GlobalDestructors.hlsl() [ "convergencectrl"(token %0) ]
-// NOINLINE-SPIRV-NEXT:   %1 = call i32 @llvm.spv.flattened.thread.id.in.group()
-// NOINLINE-SPIRV-NEXT:   call spir_func void @_Z4mainj(i32 %1) [ "convergencectrl"(token %0) ]
-// NOINLINE-SPIRV-NEXT:   call spir_func void @_GLOBAL__D_a() [ "convergencectrl"(token %0) ]
-// NOINLINE-SPIRV-NEXT:   ret void
-
-// Verify inlining leaves only calls to "llvm." intrinsics
-// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
-// INLINE:   ret void
-
-// This is really just a sanity check I needed for myself to verify that
-// function scope static variables also get destroyed properly.
-
-// NOINLINE-DXIL:       define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
-// NOINLINE-DXIL-NEXT:  entry:
-// NOINLINE-DXIL-NEXT:    call void @_ZN4TailD1Ev(ptr @_ZZ3WagvE1T)
-// NOINLINE-DXIL-NEXT:    call void @_ZN6PupperD1Ev(ptr @GlobalPup)
-// NOINLINE-DXIL-NEXT:    ret void
-
-// NOINLINE-SPIRV:      define internal spir_func void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
-// NOINLINE-SPIRV-NEXT: entry:
-// NOINLINE-SPIRV-NEXT:   %0 = call token @llvm.experimental.convergence.entry()
-// NOINLINE-SPIRV-NEXT:   call spir_func void @_ZN4TailD1Ev(ptr addrspacecast (ptr addrspace(10) @_ZZ3WagvE1T to ptr)) [ "convergencectrl"(token %0) ]
-// NOINLINE-SPIRV-NEXT:   call spir_func void @_ZN6PupperD1Ev(ptr @GlobalPup) [ "convergencectrl"(token %0) ]
-// NOINLINE-SPIRV-NEXT:   ret void
-
-// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/convergence/global_array.hlsl b/clang/test/CodeGenHLSL/convergence/global_array.hlsl
deleted file mode 100644
index c594e3a3e62ae..0000000000000
--- a/clang/test/CodeGenHLSL/convergence/global_array.hlsl
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
-
-// CHECK: define internal spir_func void @__cxx_global_var_init()
-// CHECK: [[entry_token:%.*]] = call token @llvm.experimental.convergence.entry()
-// CHECK: br label %[[loop_entry:.*]]
-
-// CHECK: [[loop_entry]]:
-// CHECK: [[loop_token:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[entry_token]]) ]
-// CHECK: call spir_func void {{.*}} [ "convergencectrl"(token [[loop_token]]) ]
-// CHECK: br i1 {{%.*}} label {{%.*}} label %[[loop_entry]]
-
-struct S {
-    int i;
-    S() { i = 10; }
-};
-
-static S s[2];
-
-[numthreads(4,1,1)]
-void main() {
-}
-
diff --git a/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl b/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl
new file mode 100644
index 0000000000000..a43381db4d3b2
--- /dev/null
+++ b/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -std=hlsl202x -emit-llvm -disable-llvm-passes -hlsl-entry main -o - %s | FileCheck %s
+
+struct Shared {
+  int A;
+  float F;
+  double Arr[1];
+};
+
+// CHECK: [[SharedData:@.*]] = external hidden addrspace(3) global %struct.Shared, align 1
+groupshared Shared SharedData;
+// CHECK: [[SharedData2:@.*]] = external hidden addrspace(3) global %struct.Shared, align 1
+groupshared Shared SharedData2;
+
+// CHECK-LABEL: define hidden void @_Z3fn1RU3AS36Shared(ptr addrspace(3) noundef align 1 dereferenceable(16) %Sh)
+// CHECK: [[ShAddr:%.*]] = alloca ptr addrspace(3), align 4
+// CHECK-NEXT: [[DAddr:%.*]] = alloca double, align 8
+// CHECK-NEXT: store ptr addrspace(3) %Sh, ptr [[ShAddr]], align 4
+// CHECK-NEXT: [[Sh:%.*]] = load ptr addrspace(3), ptr [[ShAddr]], align 4
+// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr addrspace(3) [[Sh]], i32 0, i32 0
+// CHECK-NEXT: store i32 10, ptr addrspace(3) [[A]], align 1
+// CHECK-NEXT: [[Sh2:%.*]] = load ptr addrspace(3), ptr [[ShAddr]], align 4
+// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr addrspace(3) [[Sh2]], i32 0, i32 1
+// CHECK-NEXT: store float 0x40263851E0000000, ptr addrspace(3) [[F]], align 1
+// CHECK-NEXT: store double 1.000000e+01, ptr [[DAddr]], align 8
+// CHECK-NEXT: [[D:%.*]] = load double, ptr [[DAddr]], align 8
+// CHECK-NEXT: [[Sh3:%.*]] = load ptr addrspace(3), ptr [[ShAddr]], align 4
+// CHECK-NEXT: [[Arr:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr addrspace(3) [[Sh3]], i32 0, i32 2
+// CHECK-NEXT: [[ArrIdx:%.*]] = getelementptr inbounds [1 x double], ptr addrspace(3) [[Arr]], i32 0, i32 1
+// CHECK-NEXT: store double [[D]], ptr addrspace(3) [[ArrIdx]], align 1
+// CHECK-NEXT: ret void
+void fn1(groupshared Shared Sh) {
+  Sh.A = 10;
+  Sh.F = 11.11;
+  double D = 10.0;
+  Sh.Arr[1] = D;
+}
+
+
+// CHECK-LABEL: define internal void @_Z4mainDv3_j(<3 x i32> noundef %TID)
+[numthreads(4, 1, 1)]
+void main(uint3 TID : SV_GroupThreadID) {
+// CHECK: [[SAddr:%.*]] = alloca %struct.Shared, align 1
+// CHECK: call void @_Z3fn1RU3AS36Shared(ptr addrspace(3) noundef align 1 dereferenceable(16) [[SharedData]]) #3
+  fn1(SharedData);
+
+// CHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr [[SAddr]], i32 0, i32 0
+// CHECK-NEXT: [[SD:%.*]] = load i32, ptr addrspace(3) [[SharedData]], align 1
+// CHECK-NEXT: store i32 [[SD]], ptr [[A]], align 1
+// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr [[SAddr]], i32 0, i32 1
+// CHECK-NEXT: [[F2:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData]], i32 4), align 1
+// CHECK-NEXT: store float [[F2]], ptr [[F]], align 1
+// CHECK-NEXT: [[Arr:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr [[SAddr]], i32 0, i32 2
+// CHECK-NEXT: [[Arr2:%.*]] = load double, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData]], i32 8), align 1
+// CHECK-NEXT: store double [[Arr2]], ptr [[Arr]], align 1
+  Shared S = SharedData;
+
+// CHECK-NEXT: [[ASD:%.*]] = load i32, ptr addrspace(3) [[SharedData]], align 1
+// CHECK-NEXT: store i32 [[ASD]], ptr addrspace(3) [[SharedData2]], align 1
+// CHECK-NEXT: [[FSD:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData]], i32 4), align 1
+// CHECK-NEXT: store float [[FSD]], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData2]], i32 4), align 1
+// CHECK-NEXT: [[ArrSD:%.*]] = load double, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData]], i32 8), align 1
+// CHECK-NEXT: store double [[ArrSD]], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData2]], i32 8), align 1
+  SharedData2 = SharedData;
+}
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
deleted file mode 100644
index b0d5a783fb372..0000000000000
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ /dev/null
@@ -1,76 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-
-// Tests that implicit constructor calls for user classes will always be inlined.
-
-struct Weed {
-  Weed() {Count += 1;}
-  [[maybe_unused]] void pull() {Count--;}
-  static int weedCount() { return Count; }
-private:
-  static int Count;
-
-} YardWeeds;
-
-int Weed::Count = 1; // It begins. . .
-
-struct Kitty {
-  unsigned burrsInFur;
-
-  Kitty() {
-    burrsInFur = 0;
-  }
-
-  void wanderInYard(int hours) {
-    burrsInFur = hours*Weed::weedCount()/8;
-  }
-
-  void lick() {
-    if(burrsInFur) {
-      burrsInFur--;
-      Weed w;
-    }
-  }
-
-} Nion;
-
-void NionsDay(int hours) {
-  static Kitty Nion;
-  Nion.wanderInYard(hours);
-  while(Nion.burrsInFur) Nion.lick();
-}
-
-// CHECK:      define void @main()
-// CHECK-NEXT: entry:
-// Verify constructor is emitted
-// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
-// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-// NOINLINE-NEXT: call void @_Z4mainj(i32 %0)
-// Verify inlining leaves only calls to "llvm." intrinsics
-// INLINE-NOT:    call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
-// CHECK:         ret void
-[shader("compute")]
-[numthreads(1,1,1)]
-void main(unsigned GI : SV_GroupIndex) {
-  NionsDay(10);
-}
-
-
-// CHECK:      define void @rainyMain()
-// CHECK-NEXT: entry:
-// Verify constructor is emitted
-// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_inline_constructors.hlsl()
-// NOINLINE-NEXT:   call void @_Z9rainyMainv()
-// Verify inlining leaves only calls to "llvm." intrinsics
-// INLINE-NOT:      call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
-// CHECK:           ret void
-[shader("compute")]
-[numthreads(1,1,1)]
-void rainyMain() {
-  NionsDay(1);
-}
-
diff --git a/clang/test/CodeGenHLSL/this-assignment.hlsl b/clang/test/CodeGenHLSL/this-assignment.hlsl
index efccc96499242..6efae95fdbb75 100644
--- a/clang/test/CodeGenHLSL/this-assignment.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment.hlsl
@@ -6,13 +6,14 @@ struct Pair {
 
   int getFirst() {
     Pair Another = {5, 10};
+    Pair B = Another;
     this = Another;
-	  return this.First;
+    return this.First;
   }
 
   // In HLSL 202x, this is a move assignment rather than a copy.
   int getSecond() {
-    this = Pair();
+//    this = Pair();
     return Second;
   }
 
diff --git a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
index fbb47bd2e7d39..8b5b763f38788 100644
--- a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
@@ -16,7 +16,7 @@ struct R {
 // Can't cast a union
 export void cantCast2() {
   R r = (R)1;
-  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'R'}}
+  // expected-error at -1 {{too few initializers in list for type 'R' (expected 2 but found 1)}}
 }
 
 RWBuffer<float4> Buf;
@@ -24,7 +24,7 @@ RWBuffer<float4> Buf;
 // Can't cast an intangible type
 export void cantCast3() {
   Buf = (RWBuffer<float4>)1;
-  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'RWBuffer<float4>' (aka 'RWBuffer<vector<float, 4>>')}}
+  // expected-error at -1 {{no viable conversion from 'int' to 'hlsl::RWBuffer<vector<float, 4>>'}}
 }
 
 export void cantCast4() {
@@ -39,5 +39,5 @@ struct X {
 
 export void cantCast5() {
   X x = (X)1;
-  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'X'}}
+  // expected-error at -1 {{too few initializers in list for type 'X' (expected 2 but found 1)}}
 }
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
index d9f50e9b0307f..15a3186931cb6 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
@@ -8,9 +8,6 @@ export void cantCast() {
 }
 
 struct R {
-// expected-note at -1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'const R' for 1st argument}}
-// expected-note at -2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector<int, 2>') to 'R' for 1st argument}}
-// expected-note at -3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}}
   int A;
   union {
     float F;
@@ -21,7 +18,8 @@ struct R {
 export void cantCast4() {
   int2 A = {1,2};
   R r = R(A);
-  // expected-error at -1 {{no matching conversion for functional-style cast from 'int2' (aka 'vector<int, 2>') to 'R'}}
+  // expected-error at -1 {{no viable conversion from 'int' to 'R}}
+  // expected-error at -2 {{no viable conversion from 'float' to 'R}}
   R r2;
   r2.A = 1;
   r2.F = 2.0;
diff --git a/clang/test/SemaHLSL/Language/InitLists.hlsl b/clang/test/SemaHLSL/Language/InitLists.hlsl
index c31c0fde33f30..c3f2c4af09065 100644
--- a/clang/test/SemaHLSL/Language/InitLists.hlsl
+++ b/clang/test/SemaHLSL/Language/InitLists.hlsl
@@ -100,15 +100,12 @@ void Errs() {
 
 struct R {
   int A;
-  union { // #anon
+  union {
     float F;
     int4 G;
   };
 };
 
-// expected-note@#anon{{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to}}
-// expected-note@#anon{{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to}}
-
 void Err2(RWBuffer<float4> B) {
   ContainsResource RS1 = {1, B};
   ContainsResource RS2 = (1.xx); // expected-error{{no viable conversion from 'vector<int, 2>' (vector of 2 'int' values) to 'ContainsResource'}}
@@ -118,8 +115,5 @@ void Err2(RWBuffer<float4> B) {
   R r = {1,2}; // expected-error{{no viable conversion from 'int' to 'R::(anonymous union at}}
 }
 
-// expected-note@#ContainsResource{{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'vector<int, 2>' (vector of 2 'int' values) to 'const ContainsResource &' for 1st argument}}
-// expected-note@#ContainsResource{{candidate constructor (the implicit move constructor) not viable: no known conversion from 'vector<int, 2>' (vector of 2 'int' values) to 'ContainsResource &&' for 1st argument}}
-
 // This note refers to the RWBuffer copy constructor that do not have a source locations
 // expected-note@*{{candidate constructor not viable}}
diff --git a/clang/test/SemaHLSL/Types/AggregateSplatConstantExpr.hlsl b/clang/test/SemaHLSL/Types/AggregateSplatConstantExpr.hlsl
index 630acd8297642..de08652c0e2ec 100644
--- a/clang/test/SemaHLSL/Types/AggregateSplatConstantExpr.hlsl
+++ b/clang/test/SemaHLSL/Types/AggregateSplatConstantExpr.hlsl
@@ -56,34 +56,4 @@ export void fn() {
   _Static_assert(A7[0][1] == 1, "Woo!");
   _Static_assert(A7[1][0] == 1, "Woo!");
   _Static_assert(A7[1][1] == 1, "Woo!");
-
-  // result type struct
-  // splat from a scalar
-  constexpr double D = 97.6789;
-  constexpr R SR = (R)(D + 3.0);
-  _Static_assert(SR.D == 100.6789, "Woo!");
-  _Static_assert(SR.U[0] == 100, "Woo!");
-  _Static_assert(SR.U[1] == 100, "Woo!");
-  _Static_assert(SR.I == 4, "Woo!");
-  _Static_assert(SR.I2 == 4, "Woo!");
-  _Static_assert(SR.G == 100, "Woo!");
-  _Static_assert(SR.F == 100.6789, "Woo!");
-
-  // splat from a vector of size 1
-  constexpr float1 A100 = {1000.1111};
-  constexpr B2 SB2 = (B2)A100;
-  _Static_assert(SB2.A == 1000.1111, "Woo!");
-  _Static_assert(SB2.B == 1000.1111, "Woo!");
-  _Static_assert(SB2.C == 1000, "Woo!");
-  _Static_assert(SB2.D == 1000, "Woo!");
-  _Static_assert(SB2.BB == true, "Woo!");
-
-  // splat from a bool to an int and float etc
-  constexpr bool B = true;
-  constexpr B2 SB3 = (B2)B;
-  _Static_assert(SB3.A == 1.0, "Woo!");
-  _Static_assert(SB3.B == 1.0, "Woo!");
-  _Static_assert(SB3.C == 1, "Woo!");
-  _Static_assert(SB3.D == 1, "Woo!");
-  _Static_assert(SB3.BB == true, "Woo!");
 }
diff --git a/clang/test/SemaHLSL/Types/ElementwiseCastConstantExpr.hlsl b/clang/test/SemaHLSL/Types/ElementwiseCastConstantExpr.hlsl
index c9963c36ce23a..8ca63fcf70edc 100644
--- a/clang/test/SemaHLSL/Types/ElementwiseCastConstantExpr.hlsl
+++ b/clang/test/SemaHLSL/Types/ElementwiseCastConstantExpr.hlsl
@@ -70,15 +70,6 @@ export void fn() {
   _Static_assert(FArr[1] == 200.11, "Woo!");
   _Static_assert(FArr[2] == 300.11, "Woo!");
 
-  // result type struct from struct
-  constexpr B2 SB2 = {5.5, 6.5, 1000, 5000, false};
-  constexpr Base SB = (Base)SB2;
-  _Static_assert(SB.D == 5.5, "Woo!");
-  _Static_assert(SB.U[0] == 6, "Woo!");
-  _Static_assert(SB.U[1] == 1000, "Woo!");
-  _Static_assert(SB.I == 8, "Woo!");
-  _Static_assert(SB.I2 == 0, "Woo!");
-
   // Make sure we read bitfields correctly
   constexpr Base BB = {222.22, {100, 200}, -2, 7};
   constexpr int Arr3[5] = (int[5])BB;
diff --git a/clang/test/SemaHLSL/Types/InitListConstantExpr.hlsl b/clang/test/SemaHLSL/Types/InitListConstantExpr.hlsl
index c2797f5c8d94e..acf73823e3164 100644
--- a/clang/test/SemaHLSL/Types/InitListConstantExpr.hlsl
+++ b/clang/test/SemaHLSL/Types/InitListConstantExpr.hlsl
@@ -47,4 +47,47 @@ export void fn() {
   _Static_assert(SB3.U[1] == 1000, "Woo!");
   _Static_assert(SB3.I == 8, "Woo!");
   _Static_assert(SB3.I2 == 0, "Woo!");
+
+  // Test taken from ElementwiseCastConstantExpr.hlsl
+  // because it crashes for the same reason as the other tests in this file
+  // result type struct from struct
+  constexpr B2 SB2 = {5.5, 6.5, 1000, 5000, false};
+  constexpr Base SB = (Base)SB2;
+  _Static_assert(SB.D == 5.5, "Woo!");
+  _Static_assert(SB.U[0] == 6, "Woo!");
+  _Static_assert(SB.U[1] == 1000, "Woo!");
+  _Static_assert(SB.I == 8, "Woo!");
+  _Static_assert(SB.I2 == 0, "Woo!");
+
+  // The below tests were taken from AggregateSplatConstantExpr.hlsl
+  // because they crash for the same reason as the other tests in this file
+  // result type struct
+  // splat from a scalar
+  constexpr double D = 97.6789;
+  constexpr R SR = (R)(D + 3.0);
+  _Static_assert(SR.D == 100.6789, "Woo!");
+  _Static_assert(SR.U[0] == 100, "Woo!");
+  _Static_assert(SR.U[1] == 100, "Woo!");
+  _Static_assert(SR.I == 4, "Woo!");
+  _Static_assert(SR.I2 == 4, "Woo!");
+  _Static_assert(SR.G == 100, "Woo!");
+  _Static_assert(SR.F == 100.6789, "Woo!");
+
+  // splat from a vector of size 1
+  constexpr float1 A100 = {1000.1111};
+  constexpr B2 SB2 = (B2)A100;
+  _Static_assert(SB2.A == 1000.1111, "Woo!");
+  _Static_assert(SB2.B == 1000.1111, "Woo!");
+  _Static_assert(SB2.C == 1000, "Woo!");
+  _Static_assert(SB2.D == 1000, "Woo!");
+  _Static_assert(SB2.BB == true, "Woo!");
+
+  // splat from a bool to an int and float etc
+  constexpr bool B = true;
+  constexpr B2 SB3 = (B2)B;
+  _Static_assert(SB3.A == 1.0, "Woo!");
+  _Static_assert(SB3.B == 1.0, "Woo!");
+  _Static_assert(SB3.C == 1, "Woo!");
+  _Static_assert(SB3.D == 1, "Woo!");
+  _Static_assert(SB3.BB == true, "Woo!");
 }
diff --git a/clang/test/SemaHLSL/prohibit_pointer.hlsl b/clang/test/SemaHLSL/prohibit_pointer.hlsl
index 76c017150f9d5..90f1160e1d593 100644
--- a/clang/test/SemaHLSL/prohibit_pointer.hlsl
+++ b/clang/test/SemaHLSL/prohibit_pointer.hlsl
@@ -68,6 +68,7 @@ struct Fish {
 
   // expected-note at +1 {{'->' applied to return value of the operator->() declared here}}
   Fins operator ->() {
+    // expected-error at +1 {{no matching constructor for initialization of 'Fins'}}
     return Fins();
   }
 };

>From aeb11bc367737d1a9098356e24cc66c0a992769a Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Wed, 1 Apr 2026 16:35:09 -0700
Subject: [PATCH 2/9] update tests

---
 .../BasicFeatures/AggregateSplatCast.hlsl     |  252 +++-
 .../BasicFeatures/ArrayElementwiseCast.hlsl   |    2 +-
 .../CodeGenHLSL/BasicFeatures/InitLists.hlsl  |  144 ++-
 .../BasicFeatures/MatrixElementTypeCast.hlsl  |   16 +-
 .../BasicFeatures/OutputArguments.hlsl        |   28 +-
 .../BasicFeatures/StructElementwiseCast.hlsl  | 1063 +++++++++++++----
 .../BasicFeatures/VectorElementwiseCast.hlsl  |   12 +-
 clang/test/CodeGenHLSL/BoolMatrix.hlsl        |    8 +-
 .../CodeGenHLSL/builtins/hlsl_resource_t.hlsl |    8 +-
 .../StructuredBuffers-subscripts.hlsl         |   17 +-
 .../semantics/semantic-struct-2-output.hlsl   |    4 +-
 .../CodeGenHLSL/this-assignment-overload.hlsl |   21 +-
 clang/test/CodeGenHLSL/this-assignment.hlsl   |   55 +-
 clang/test/SemaHLSL/GlobalConstructors.hlsl   |    1 +
 .../Language/ElementwiseCast-errors.hlsl      |    2 +-
 15 files changed, 1262 insertions(+), 371 deletions(-)

diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index abfea79f0a454..b234e815696f1 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -53,32 +53,73 @@ struct S {
 };
 
 // struct splats
-// CHECK-LABEL: define void {{.*}}call3
-// CHECK: [[AA:%.*]] = alloca i32, align 4
-// CHECK: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
-// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[AA]], align 4
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
+// CHECK-LABEL: define void @_Z5call3i(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[REF_TMP3:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X2]], align 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[GEP4]], align 4
+// CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[TMP2]] to float
+// CHECK-NEXT:    store float [[CONV6]], ptr [[GEP5]], align 4
+// CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[Y7]], align 1
+// CHECK-NEXT:    store float [[TMP3]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
 export void call3(int A) {
   S s = (S)A;
 }
 
 // struct splat from vector of length 1
-// CHECK-LABEL: define void {{.*}}call5
-// CHECK: [[A:%.*]] = alloca <1 x i32>, align 4
-// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[A]], align 4
-// CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[A]], align 4
-// CHECK-NEXT: [[VL:%.*]] = extractelement <1 x i32> [[L]], i32 0
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
+// CHECK-LABEL: define void @_Z5call5v(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca <1 x i32>, align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[REF_TMP3:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    store <1 x i32> splat (i32 1), ptr [[A]], align 4
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i32>, ptr [[A]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[CAST_VTRUNC]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[CAST_VTRUNC]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X2]], align 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i32>, ptr [[A]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC4:%.*]] = extractelement <1 x i32> [[TMP2]], i32 0
+// CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[CAST_VTRUNC4]], ptr [[GEP5]], align 4
+// CHECK-NEXT:    [[CONV7:%.*]] = sitofp i32 [[CAST_VTRUNC4]] to float
+// CHECK-NEXT:    store float [[CONV7]], ptr [[GEP6]], align 4
+// CHECK-NEXT:    [[Y8:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[Y8]], align 1
+// CHECK-NEXT:    store float [[TMP3]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
 export void call5() {
   int1 A = {1};
   S s = (S)A;
@@ -101,17 +142,38 @@ export void call9() {
 }
 
 // struct splat from 1x1 matrix
-// CHECK-LABEL: define void {{.*}}call10
-// CHECK: [[M:%.*]] = alloca [1 x <1 x i32>], align 4
-// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[M]], align 4
-// CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[M]], align 4
-// CHECK-NEXT: [[ML:%.*]] = extractelement <1 x i32> [[L]], i32 0
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: store i32 [[ML]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[ML]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
+// CHECK-LABEL: define void @_Z6call10v(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca [1 x <1 x i32>], align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[REF_TMP3:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    store <1 x i32> splat (i32 1), ptr [[M]], align 4
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <1 x i32>, ptr [[M]], align 4
+// CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <1 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[CAST_MTRUNC]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X2]], align 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i32>, ptr [[M]], align 4
+// CHECK-NEXT:    [[CAST_MTRUNC4:%.*]] = extractelement <1 x i32> [[TMP2]], i32 0
+// CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC4]], ptr [[GEP5]], align 4
+// CHECK-NEXT:    [[CONV7:%.*]] = sitofp i32 [[CAST_MTRUNC4]] to float
+// CHECK-NEXT:    store float [[CONV7]], ptr [[GEP6]], align 4
+// CHECK-NEXT:    [[Y8:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[Y8]], align 1
+// CHECK-NEXT:    store float [[TMP3]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
 export void call10() {
   int1x1 M = {1};
   S s = (S)M;
@@ -129,28 +191,110 @@ struct Derived : BFields {
 };
 
 // derived struct with bitfields splat from scalar
-// CHECK-LABEL: call6
-// CHECK: [[AAddr:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: store i32 %A, ptr [[AAddr]], align 4
-// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[AAddr]], align 4
-// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0
-// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
-// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 0
-// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 2
-// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[B]] to double
-// CHECK-NEXT: store double [[C]], ptr [[Gep1]], align 8
-// CHECK-NEXT: [[H:%.*]] = trunc i32 [[B]] to i24
-// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1
-// CHECK-NEXT: [[BFV:%.*]] = and i24 [[H]], 32767
-// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL]], -32768
-// CHECK-NEXT: [[BFS:%.*]] = or i24 [[BFC]], [[BFV]]
-// CHECK-NEXT: store i24 [[BFS]], ptr [[E]], align 1
-// CHECK-NEXT: [[C4:%.*]] = sitofp i32 [[B]] to float
-// CHECK-NEXT: store float [[C4]], ptr [[Gep2]], align 4
-// CHECK-NEXT: store i32 [[B]], ptr [[Gep3]], align 4
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define void @_Z5call6i(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D:%.*]] = alloca [[STRUCT_DERIVED:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP7:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP25:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP38:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[DF:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[D]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to double
+// CHECK-NEXT:    store double [[CONV]], ptr [[GEP1]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i24
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT:    [[BF_VALUE:%.*]] = and i24 [[TMP1]], 32767
+// CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i24 [[BF_LOAD]], -32768
+// CHECK-NEXT:    [[BF_SET:%.*]] = or i24 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK-NEXT:    store i24 [[BF_SET]], ptr [[E]], align 1
+// CHECK-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    store float [[CONV4]], ptr [[GEP2]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[DF5:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[DF5]], align 1
+// CHECK-NEXT:    store double [[TMP2]], ptr [[DF]], align 1
+// CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[D]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP7]], i32 0, i32 0
+// CHECK-NEXT:    [[E9:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP8]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP7]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP7]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP12:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP7]], i32 0, i32 1
+// CHECK-NEXT:    [[CONV13:%.*]] = sitofp i32 [[TMP3]] to double
+// CHECK-NEXT:    store double [[CONV13]], ptr [[GEP10]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
+// CHECK-NEXT:    [[BF_LOAD14:%.*]] = load i24, ptr [[E9]], align 1
+// CHECK-NEXT:    [[BF_VALUE15:%.*]] = and i24 [[TMP4]], 32767
+// CHECK-NEXT:    [[BF_CLEAR16:%.*]] = and i24 [[BF_LOAD14]], -32768
+// CHECK-NEXT:    [[BF_SET17:%.*]] = or i24 [[BF_CLEAR16]], [[BF_VALUE15]]
+// CHECK-NEXT:    store i24 [[BF_SET17]], ptr [[E9]], align 1
+// CHECK-NEXT:    [[CONV18:%.*]] = sitofp i32 [[TMP3]] to float
+// CHECK-NEXT:    store float [[CONV18]], ptr [[GEP11]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP12]], align 4
+// CHECK-NEXT:    [[E19:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP7]], i32 0, i32 1
+// CHECK-NEXT:    [[BF_LOAD20:%.*]] = load i24, ptr [[E19]], align 1
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i24 [[BF_LOAD20]], 9
+// CHECK-NEXT:    [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
+// CHECK-NEXT:    [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[BF_CAST]] to i24
+// CHECK-NEXT:    [[BF_LOAD21:%.*]] = load i24, ptr [[E6]], align 1
+// CHECK-NEXT:    [[BF_VALUE22:%.*]] = and i24 [[TMP5]], 32767
+// CHECK-NEXT:    [[BF_CLEAR23:%.*]] = and i24 [[BF_LOAD21]], -32768
+// CHECK-NEXT:    [[BF_SET24:%.*]] = or i24 [[BF_CLEAR23]], [[BF_VALUE22]]
+// CHECK-NEXT:    store i24 [[BF_SET24]], ptr [[E6]], align 1
+// CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[D]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP26:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP25]], i32 0, i32 0
+// CHECK-NEXT:    [[E27:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP26]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP28:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP25]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP29:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP25]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP30:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP25]], i32 0, i32 1
+// CHECK-NEXT:    [[CONV31:%.*]] = sitofp i32 [[TMP6]] to double
+// CHECK-NEXT:    store double [[CONV31]], ptr [[GEP28]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i24
+// CHECK-NEXT:    [[BF_LOAD32:%.*]] = load i24, ptr [[E27]], align 1
+// CHECK-NEXT:    [[BF_VALUE33:%.*]] = and i24 [[TMP7]], 32767
+// CHECK-NEXT:    [[BF_CLEAR34:%.*]] = and i24 [[BF_LOAD32]], -32768
+// CHECK-NEXT:    [[BF_SET35:%.*]] = or i24 [[BF_CLEAR34]], [[BF_VALUE33]]
+// CHECK-NEXT:    store i24 [[BF_SET35]], ptr [[E27]], align 1
+// CHECK-NEXT:    [[CONV36:%.*]] = sitofp i32 [[TMP6]] to float
+// CHECK-NEXT:    store float [[CONV36]], ptr [[GEP29]], align 4
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[GEP30]], align 4
+// CHECK-NEXT:    [[F37:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP25]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F37]], align 1
+// CHECK-NEXT:    store float [[TMP8]], ptr [[F]], align 1
+// CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds nuw [[STRUCT_DERIVED]], ptr [[D]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP39:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP38]], i32 0, i32 0
+// CHECK-NEXT:    [[E40:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP39]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP41:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP38]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP42:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP38]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP43:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP38]], i32 0, i32 1
+// CHECK-NEXT:    [[CONV44:%.*]] = sitofp i32 [[TMP9]] to double
+// CHECK-NEXT:    store double [[CONV44]], ptr [[GEP41]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[TMP9]] to i24
+// CHECK-NEXT:    [[BF_LOAD45:%.*]] = load i24, ptr [[E40]], align 1
+// CHECK-NEXT:    [[BF_VALUE46:%.*]] = and i24 [[TMP10]], 32767
+// CHECK-NEXT:    [[BF_CLEAR47:%.*]] = and i24 [[BF_LOAD45]], -32768
+// CHECK-NEXT:    [[BF_SET48:%.*]] = or i24 [[BF_CLEAR47]], [[BF_VALUE46]]
+// CHECK-NEXT:    store i24 [[BF_SET48]], ptr [[E40]], align 1
+// CHECK-NEXT:    [[CONV49:%.*]] = sitofp i32 [[TMP9]] to float
+// CHECK-NEXT:    store float [[CONV49]], ptr [[GEP42]], align 4
+// CHECK-NEXT:    store i32 [[TMP9]], ptr [[GEP43]], align 4
+// CHECK-NEXT:    [[G50:%.*]] = getelementptr inbounds nuw [[STRUCT_DERIVED]], ptr [[REF_TMP38]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[G50]], align 1
+// CHECK-NEXT:    store i32 [[TMP11]], ptr [[G]], align 1
+// CHECK-NEXT:    ret void
+//
 export void call6(int A) {
   Derived D = (Derived)A;
 }
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index 740b80afdb609..8accc6e394f2c 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -158,7 +158,7 @@ struct Derived : BFields {
 // CHECK-LABEL: call8
 // CHECK: [[A:%.*]] = alloca [4 x i32], align 4
 // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 1
 // CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 0, i32 2
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 9c42da8962c2d..5c4f1044f9ee6 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -66,6 +66,8 @@ struct UnnamedDerived : UnnamedOnly {};
 // CHECK-LABEL: define hidden void @_Z5case1v(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case1v.TF1, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -78,6 +80,8 @@ TwoFloats case1() {
 // CHECK-LABEL: define hidden void @_Z5case2v(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_RESULT]], ptr align 1 @__const._Z5case2v.TF2, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -90,7 +94,9 @@ TwoFloats case2() {
 // CHECK-LABEL: define hidden void @_Z5case3i(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[VAL:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
@@ -110,7 +116,9 @@ TwoFloats case3(int Val) {
 // CHECK-LABEL: define hidden void @_Z5case4Dv2_i(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 4
@@ -133,7 +141,9 @@ TwoFloats case4(int2 TwoVals) {
 // CHECK-LABEL: define hidden void @_Z5case5Dv2_i(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], <2 x i32> noundef [[TWOVALS:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TWOVALS_ADDR:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store <2 x i32> [[TWOVALS]], ptr [[TWOVALS_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 4
@@ -153,10 +163,14 @@ TwoInts case5(int2 TwoVals) {
 // Case 6: Initialization from a scalarized structure of different type with
 // different element types.
 // CHECK-LABEL: define hidden void @_Z5case69TwoFloats(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TF4:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF4_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TF4]], ptr [[TF4_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[TF4]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 1
 // CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr [[Z]], align 1
@@ -175,12 +189,26 @@ TwoInts case6(TwoFloats TF4) {
 // Case 7: Initialization of a complex structure, with bogus braces and element
 // conversions from a collection of scalar values, and structures.
 // CHECK-LABEL: define hidden void @_Z5case77TwoIntsS_i9TwoFloatsS0_S0_S0_(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI1:%.*]], ptr noundef byval([[STRUCT_TWOINTS]]) align 1 [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF3:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF4:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_DOGGO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TI1:%.*]], ptr noundef dead_on_return [[TI2:%.*]], i32 noundef [[VAL:%.*]], ptr noundef dead_on_return [[TF1:%.*]], ptr noundef dead_on_return [[TF2:%.*]], ptr noundef dead_on_return [[TF3:%.*]], ptr noundef dead_on_return [[TF4:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TI1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TI2_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TF1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF2_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF3_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF4_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TI1]], ptr [[TI1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TI2]], ptr [[TI2_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF1]], ptr [[TF1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF2]], ptr [[TF2_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF3]], ptr [[TF3_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF4]], ptr [[TF4_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI1]], i32 0, i32 0
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS:%.*]], ptr [[TI1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI1]], i32 0, i32 1
@@ -201,7 +229,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
-// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[TF1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[X]], align 1
 // CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 1
@@ -239,10 +267,14 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
 // Case 8: Initialization of a structure from a different structure with
 // significantly different element types and grouping.
 // CHECK-LABEL: define hidden void @_Z5case85Doggo(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ANIMALBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ANIMALBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[D1:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[D1]], ptr [[D1_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
+// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO:%.*]], ptr [[D1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[LEGSTATE]], align 1
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i64 0
 // CHECK-NEXT:    store i32 [[VECEXT]], ptr [[LEGS]], align 1
@@ -325,10 +357,16 @@ AnimalBits case8(Doggo D1) {
 // structures from different layouts, different component groupings, with no
 // top-level bracing separation.
 // CHECK-LABEL: define hidden void @_Z5case95Doggo10AnimalBits(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ZOO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_DOGGO:%.*]]) align 1 [[D1:%.*]], ptr noundef byval([[STRUCT_ANIMALBITS:%.*]]) align 1 [[A1:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_ZOO:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[D1:%.*]], ptr noundef dead_on_return [[A1:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[A1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[D1]], ptr [[D1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[A1]], ptr [[A1_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[DOGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ZOO]], ptr [[AGG_RESULT]], i32 0, i32 0
-// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[DOGS]], i32 0, i32 0
+// CHECK-NEXT:    [[LEGSTATE:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO:%.*]], ptr [[DOGS]], i32 0, i32 0
 // CHECK-NEXT:    [[LEGSTATE1:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[LEGSTATE1]], align 1
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i64 0
@@ -400,7 +438,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    store <4 x float> [[VECINIT43]], ptr [[ARRAYINIT_ELEMENT]], align 1
 // CHECK-NEXT:    [[ARRAYINIT_ELEMENT44:%.*]] = getelementptr inbounds [[STRUCT_DOGGO]], ptr [[DOGS]], i32 1
 // CHECK-NEXT:    [[LEGSTATE45:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 0
-// CHECK-NEXT:    [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 0
+// CHECK-NEXT:    [[LEGS:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS:%.*]], ptr [[A1]], i32 0, i32 0
 // CHECK-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[LEGS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX46]], align 1
 // CHECK-NEXT:    [[VECINIT47:%.*]] = insertelement <4 x i32> poison, i32 [[TMP14]], i32 0
@@ -741,9 +779,15 @@ Zoo case9(Doggo D1, AnimalBits A1) {
 
 // Case 10: Initialize an object with a base class from two objects.
 // CHECK-LABEL: define hidden void @_Z6case109TwoFloatsS_(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF1:%.*]], ptr noundef byval([[STRUCT_TWOFLOATS]]) align 1 [[TF2:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TF1:%.*]], ptr noundef dead_on_return [[TF2:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF1_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TF2_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TF1]], ptr [[TF1_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[TF2]], ptr [[TF2_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X1]], align 1
 // CHECK-NEXT:    store float [[TMP0]], ptr [[X]], align 1
@@ -770,11 +814,13 @@ FourFloats case10(TwoFloats TF1, TwoFloats TF2) {
 // CHECK-LABEL: define hidden void @_Z6case11f(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[REF_TMP1:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[REF_TMP4:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[REF_TMP7:%.*]] = alloca <4 x float>, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS:%.*]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
@@ -819,8 +865,10 @@ FourFloats case11(float F) {
 // CHECK-LABEL: define hidden void @_Z6case12ii(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[J_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[J]], ptr [[J_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
@@ -839,9 +887,13 @@ SlicyBits case12(int I, int J) {
 
 // Case 13: Initialize bitfield from a struct of two ints.
 // CHECK-LABEL: define hidden void @_Z6case137TwoInts(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_SLICYBITS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[TI:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[TI_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[TI]], ptr [[TI_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS:%.*]], ptr [[TI]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
 // CHECK-NEXT:    store i8 [[TMP1]], ptr [[AGG_RESULT]], align 1
@@ -859,14 +911,18 @@ SlicyBits case13(TwoInts TI) {
 
 // Case 14: Initialize struct of ints from struct with bitfields.
 // CHECK-LABEL: define hidden void @_Z6case149SlicyBits(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOINTS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[SB:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[SB_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[SB]], ptr [[SB_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
 // CHECK-NEXT:    store i32 [[BF_CAST]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1
-// CHECK-NEXT:    [[W1:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
+// CHECK-NEXT:    [[W1:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS:%.*]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD2:%.*]] = load i8, ptr [[W1]], align 1
 // CHECK-NEXT:    [[BF_CAST3:%.*]] = sext i8 [[BF_LOAD2]] to i32
 // CHECK-NEXT:    store i32 [[BF_CAST3]], ptr [[W]], align 1
@@ -879,15 +935,19 @@ TwoInts case14(SlicyBits SB) {
 
 // Case 15: Initialize struct of floats from struct with bitfields.
 // CHECK-LABEL: define hidden void @_Z6case159SlicyBits(
-// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_SLICYBITS:%.*]]) align 1 [[SB:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noundef dead_on_return [[SB:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[SB_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
+// CHECK-NEXT:    store ptr [[SB]], ptr [[SB_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
 // CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
-// CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
+// CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS:%.*]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
 // CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
@@ -904,7 +964,9 @@ TwoFloats case15(SlicyBits SB) {
 // CHECK-LABEL: define hidden void @_Z7makeTwoRf(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_TWOFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]], ptr noalias noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[X_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store ptr [[X]], ptr [[X_ADDR]], align 4
 // CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]]
@@ -930,9 +992,11 @@ TwoFloats makeTwo(inout float X) {
 // CHECK-LABEL: define hidden void @_Z6case16v(
 // CHECK-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOURFLOATS:%.*]]) align 1 [[AGG_RESULT:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[X:%.*]] = alloca float, align 4
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_TWOFLOATS:%.*]], align 1
 // CHECK-NEXT:    [[TMP:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
 // CHECK-NEXT:    store float 0.000000e+00, ptr [[X]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 4
 // CHECK-NEXT:    store float [[TMP0]], ptr [[TMP]], align 4
@@ -1002,11 +1066,13 @@ void case18() {
 
 // InitList with Struct with unnamed bitfield on RHS
 // CHECK-LABEL: define hidden void @_Z6case197Unnamed(
-// CHECK-SAME: ptr noundef byval([[STRUCT_UNNAMED:%.*]]) align 1 [[U:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[U:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[U_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[U]], ptr [[U_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
-// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_UNNAMED]], ptr [[U]], i32 0, i32 0
+// CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_UNNAMED:%.*]], ptr [[U]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 1
 // CHECK-NEXT:    store i32 [[TMP0]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 1
@@ -1031,9 +1097,11 @@ void case20() {
 
 // InitList with Empty Struct on RHS
 // CHECK-LABEL: define hidden void @_Z6case215Empty(
-// CHECK-SAME: ptr noundef byval([[STRUCT_EMPTY:%.*]]) align 1 [[E:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[E:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[E_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[E]], ptr [[E_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI]], ptr align 1 @__const._Z6case215Empty.TI, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -1055,9 +1123,11 @@ void case22() {
 
 // InitList with Struct with only unnamed bitfield on RHS
 // CHECK-LABEL: define hidden void @_Z6case2311UnnamedOnly(
-// CHECK-SAME: ptr noundef byval([[STRUCT_UNNAMEDONLY:%.*]]) align 1 [[UO:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[UO:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[UO_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
+// CHECK-NEXT:    store ptr [[UO]], ptr [[UO_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI]], ptr align 1 @__const._Z6case2311UnnamedOnly.TI, i32 8, i1 false)
 // CHECK-NEXT:    ret void
 //
@@ -1082,10 +1152,14 @@ void case24() {
 }
 
 // CHECK-LABEL: define hidden void @_Z6case2512EmptyDerived14UnnamedDerived(
-// CHECK-SAME: ptr noundef byval([[STRUCT_EMPTYDERIVED:%.*]]) align 1 [[ED:%.*]], ptr noundef byval([[STRUCT_UNNAMEDDERIVED:%.*]]) align 1 [[UD:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[ED:%.*]], ptr noundef dead_on_return [[UD:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ED_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[UD_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI1:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
 // CHECK-NEXT:    [[TI2:%.*]] = alloca [[STRUCT_TWOINTS]], align 1
+// CHECK-NEXT:    store ptr [[ED]], ptr [[ED_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[UD]], ptr [[UD_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI1]], ptr align 1 @__const._Z6case2512EmptyDerived14UnnamedDerived.TI1, i32 8, i1 false)
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TI2]], ptr align 1 @__const._Z6case2512EmptyDerived14UnnamedDerived.TI2, i32 8, i1 false)
 // CHECK-NEXT:    ret void
@@ -1096,11 +1170,13 @@ void case25(EmptyDerived ED, UnnamedDerived UD) {
 }
 
 // CHECK-LABEL: define hidden void @_Z6case267TwoInts(
-// CHECK-SAME: ptr noundef byval([[STRUCT_TWOINTS:%.*]]) align 1 [[TI:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[TI:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TI_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[F:%.*]] = alloca <4 x float>, align 4
 // CHECK-NEXT:    [[F2:%.*]] = alloca <3 x float>, align 4
-// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[TI]], ptr [[TI_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS:%.*]], ptr [[TI]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z]], align 1
 // CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x float> poison, float [[CONV]], i32 0
@@ -1134,9 +1210,11 @@ struct CustomResource {
 };
 
 // CHECK-LABEL: define hidden void @_Z6case2714CustomResource(
-// CHECK-SAME: ptr noundef byval([[STRUCT_CUSTOMRESOURCE:%.*]]) align 1 [[A:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_CUSTOMRESOURCE]], align 1
+// CHECK-NEXT:    [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_CUSTOMRESOURCE:%.*]], align 1
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[H:%.*]] = getelementptr inbounds nuw [[STRUCT_CUSTOMRESOURCE]], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT:    [[H1:%.*]] = getelementptr inbounds nuw [[STRUCT_CUSTOMRESOURCE]], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load target("dx.TypedBuffer", float, 1, 0, 0), ptr [[H1]], align 1
@@ -1150,13 +1228,15 @@ void case27(CustomResource a) {
 // Check cases with explicit casts
 
 // CHECK-LABEL: define hidden void @_Z6case289TwoFloats(
-// CHECK-SAME: ptr noundef byval([[STRUCT_TWOFLOATS:%.*]]) align 1 [[TF:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[TF:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TF_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[TI:%.*]] = alloca [[STRUCT_TWOINTS:%.*]], align 1
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_TWOINTS]], align 1
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_TWOFLOATS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_TWOFLOATS:%.*]], align 1
 // CHECK-NEXT:    [[REF_TMP6:%.*]] = alloca [[STRUCT_TWOINTS]], align 1
 // CHECK-NEXT:    [[AGG_TEMP7:%.*]] = alloca [[STRUCT_TWOFLOATS]], align 1
+// CHECK-NEXT:    store ptr [[TF]], ptr [[TF_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[TI]], i32 0, i32 0
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[TF]], i32 8, i1 false)
 // CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TWOINTS]], ptr [[REF_TMP]], i32 0, i32 0
@@ -1194,13 +1274,15 @@ void case28(TwoFloats TF) {
 }
 
 // CHECK-LABEL: define hidden void @_Z6case2910FourFloats(
-// CHECK-SAME: ptr noundef byval([[STRUCT_FOURFLOATS:%.*]]) align 1 [[FF:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[FF:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[FF_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
 // CHECK-NEXT:    [[INTS:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [2 x i32], align 4
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_FOURFLOATS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_FOURFLOATS:%.*]], align 1
 // CHECK-NEXT:    [[REF_TMP7:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT:    [[AGG_TEMP8:%.*]] = alloca [[STRUCT_FOURFLOATS]], align 1
+// CHECK-NEXT:    store ptr [[FF]], ptr [[FF_INDIRECT_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[FF]], i32 16, i1 false)
 // CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[REF_TMP]], i32 0, i32 0
 // CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x i32], ptr [[REF_TMP]], i32 0, i32 1
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
index dd9dd706aae26..85bf7c5cd72c0 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -165,17 +165,17 @@ struct Derived : BFields {
 };
 
 // CHECK-LABEL: define hidden void @_Z5call47Derived(
-// CHECK-SAME: ptr noundef byval([[STRUCT_DERIVED:%.*]]) align 1 [[D:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ptr noundef dead_on_return [[D:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[A:%.*]] = alloca [2 x <2 x i32>], align 4
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK:         [[A:%.*]] = alloca [2 x <2 x i32>], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.Derived, align 1
 // CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <4 x i32>, align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
-// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AGG_TEMP]], i32 0, i32 0
 // CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[GEP]], i32 0, i32 1
-// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
-// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
-// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AGG_TEMP]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[FLATCAST_TMP]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[GEP1]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[TMP1]] to i32
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index 2e01ddddc510c..f66d39f5eec11 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -161,7 +161,14 @@ void init(out S s) {
 // CHECK: [[S:%.*]] = alloca %struct.S
 // CHECK: [[Tmp:%.*]] = alloca %struct.S
 // CHECK: call void {{.*}}init{{.*}}(ptr noalias noundef nonnull align 1 dereferenceable(8) [[Tmp]])
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[Tmp]], i32 8, i1 false)
+// CHECK: [[X:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0
+// CHECK: [[X1:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[Tmp]], i32 0, i32 0
+// CHECK: [[X2:%.*]] = load i32, ptr [[X1]], align 1
+// CHECK: store i32 [[X2]], ptr [[X]], align 1
+// CHECK: [[Y:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 1
+// CHECK: [[Y2:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[Tmp]], i32 0, i32 1
+// CHECK: [[Y3:%.*]] = load float, ptr [[Y2]], align 1
+// CHECK: store float [[Y3]], ptr [[Y]], align 1
 
 // OPT: ret i32 7
 export int case6() {
@@ -186,9 +193,24 @@ void init(inout R s) {
 
 // CHECK: [[S:%.*]] = alloca %struct.R
 // CHECK: [[Tmp:%.*]] = alloca %struct.R
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[S]], i32 8, i1 false)
+// CHECK: [[X:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[Tmp]], i32 0, i32 0
+// CHECK: [[X1:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[S]], i32 0, i32 0
+// CHECK: [[X2:%.*]] = load i32, ptr [[X1]], align 1
+// CHECK: store i32 [[X2]], ptr [[X]], align 1
+// CHECK: [[Y:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[Tmp]], i32 0, i32 1
+// CHECK: [[Y2:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[S]], i32 0, i32 1
+// CHECK: [[Y3:%.*]] = load float, ptr [[Y2]], align 1
+// CHECK: store float [[Y3]], ptr [[Y]], align 1
+
 // CHECK: call void {{.*}}init{{.*}}(ptr noalias noundef nonnull align 1 dereferenceable(8) [[Tmp]])
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[Tmp]], i32 8, i1 false)
+// CHECK: [[X:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[S]], i32 0, i32 0
+// CHECK: [[X2:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[Tmp]], i32 0, i32 0
+// CHECK: [[X3:%.*]] = load i32, ptr [[X2]], align 1
+// CHECK: store i32 [[X3]], ptr [[X]], align 1
+// CHECK: [[Y:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[S]], i32 0, i32 1
+// CHECK: [[Y2:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[Tmp]], i32 0, i32 1
+// CHECK: [[Y3:%.*]] = load float, ptr [[Y2]], align 1
+// CHECK: store float [[Y3]], ptr [[Y]], align 1
 
 // OPT: ret i32 7
 export int case7() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index ab5873bfa8296..bd8fe35f62172 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // RUN: %clang_cc1 -finclude-default-header -fnative-half-type -fnative-int16-type -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 
 struct S {
@@ -5,58 +6,108 @@ struct S {
   float Y;
 };
 
-// struct truncation to a scalar
-// CHECK-LABEL: define void {{.*}}call0
-// CHECK: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[s]], ptr align 1 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[s]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
-// CHECK-NEXT: store i32 [[L]], ptr [[A]], align 4
-export void call0() {
+// CHECK-LABEL: define hidden void @_Z5call0v(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z5call0v.s, i32 8, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[S]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+void call0() {
   S s = {1,2};
   int A = (int)s;
 }
 
 // struct from vector
-// CHECK-LABEL: define void {{.*}}call1
-// CHECK: [[A:%.*]] = alloca <2 x i32>, align 4
-// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[A]], align 4
-// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[A]], align 4
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
-// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
-export void call1() {
+// CHECK-LABEL: define hidden void @_Z5call1v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[REF_TMP4:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A]], align 4
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 4
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[VEC_LOAD:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
+// CHECK-NEXT:    store i32 [[VEC_LOAD]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[VEC_LOAD2:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VEC_LOAD2]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X3]], align 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[A]], align 4
+// CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP4]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP4]], i32 0, i32 1
+// CHECK-NEXT:    [[VEC_LOAD7:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0
+// CHECK-NEXT:    store i32 [[VEC_LOAD7]], ptr [[GEP5]], align 4
+// CHECK-NEXT:    [[VEC_LOAD8:%.*]] = extractelement <2 x i32> [[TMP2]], i64 1
+// CHECK-NEXT:    [[CONV9:%.*]] = sitofp i32 [[VEC_LOAD8]] to float
+// CHECK-NEXT:    store float [[CONV9]], ptr [[GEP6]], align 4
+// CHECK-NEXT:    [[Y10:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP4]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[Y10]], align 1
+// CHECK-NEXT:    store float [[TMP3]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
+void call1() {
   int2 A = {1,2};
   S s = (S)A;
 }
 
 
 // struct from array
-// CHECK-LABEL: define void {{.*}}call2
-// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
-// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
-// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
-// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
-export void call2() {
+// CHECK-LABEL: define hidden void @_Z5call2v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[REF_TMP5:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[AGG_TEMP6:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @__const._Z5call2v.A, i32 8, i1 false)
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP]], ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP2]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X4:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X4]], align 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP6]], ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP5]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP5]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP9:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP6]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP6]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP9]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP7]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP10]], align 4
+// CHECK-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP4]] to float
+// CHECK-NEXT:    store float [[CONV11]], ptr [[GEP8]], align 4
+// CHECK-NEXT:    [[Y12:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP5]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[Y12]], align 1
+// CHECK-NEXT:    store float [[TMP5]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
+void call2() {
   int A[2] = {1,2};
   S s = (S)A;
 }
@@ -71,42 +122,93 @@ struct R {
 };
 
 // struct from nested struct?
-// CHECK-LABEL: define void {{.*}}call6
-// CHECK: [[r:%.*]] = alloca %struct.R, align 1
-// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.R, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[r]], ptr align 1 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[r]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
-// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
-// CHECK-NEXT: [[L4:%.*]] = load float, ptr [[G4]], align 4
-// CHECK-NEXT: store float [[L4]], ptr [[G2]], align 4
-export void call6() {
+// CHECK-LABEL: define hidden void @_Z5call6v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[R:%.*]] = alloca [[STRUCT_R:%.*]], align 1
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_R]], align 1
+// CHECK-NEXT:    [[REF_TMP5:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[AGG_TEMP6:%.*]] = alloca [[STRUCT_R]], align 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[R]], ptr align 1 @__const._Z5call6v.r, i32 8, i1 false)
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[R]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP2]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[GEP3]], align 4
+// CHECK-NEXT:    store float [[TMP1]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X4:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X4]], align 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP6]], ptr align 1 [[R]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP5]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP5]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP9:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[AGG_TEMP6]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[AGG_TEMP6]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP9]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP7]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[GEP10]], align 4
+// CHECK-NEXT:    store float [[TMP4]], ptr [[GEP8]], align 4
+// CHECK-NEXT:    [[Y11:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP5]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[Y11]], align 1
+// CHECK-NEXT:    store float [[TMP5]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
+void call6() {
   R r = {{1}, 2.0};
   S s = (S)r;
 }
 
 // nested struct from array?
-// CHECK-LABEL: define void {{.*}}call7
-// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
-// CHECK-NEXT: [[r:%.*]] = alloca %struct.R, align 1
-// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 1
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
-// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
-// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
-export void call7() {
+// CHECK-LABEL: define hidden void @_Z5call7v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[R:%.*]] = alloca [[STRUCT_R:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_R]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[REF_TMP6:%.*]] = alloca [[STRUCT_R]], align 1
+// CHECK-NEXT:    [[AGG_TEMP7:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @__const._Z5call7v.A, i32 8, i1 false)
+// CHECK-NEXT:    [[Q:%.*]] = getelementptr inbounds nuw [[STRUCT_R]], ptr [[R]], i32 0, i32 0
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_Q:%.*]], ptr [[Q]], i32 0, i32 0
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP]], ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[REF_TMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP2]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[Q4:%.*]] = getelementptr inbounds nuw [[STRUCT_R]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[Z5:%.*]] = getelementptr inbounds nuw [[STRUCT_Q]], ptr [[Q4]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[Z5]], align 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[Z]], align 1
+// CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_R]], ptr [[R]], i32 0, i32 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP7]], ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[REF_TMP6]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP9:%.*]] = getelementptr inbounds [[STRUCT_R]], ptr [[REF_TMP6]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP7]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds [2 x i32], ptr [[AGG_TEMP7]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP10]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP8]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP11]], align 4
+// CHECK-NEXT:    [[CONV12:%.*]] = sitofp i32 [[TMP4]] to float
+// CHECK-NEXT:    store float [[CONV12]], ptr [[GEP9]], align 4
+// CHECK-NEXT:    [[F13:%.*]] = getelementptr inbounds nuw [[STRUCT_R]], ptr [[REF_TMP6]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F13]], align 1
+// CHECK-NEXT:    store float [[TMP5]], ptr [[F]], align 1
+// CHECK-NEXT:    ret void
+//
+void call7() {
   int A[2] = {1,2};
   R r = (R)A;
 }
@@ -118,23 +220,49 @@ struct T {
 };
 
 // struct truncation
-// CHECK-LABEL: define void {{.*}}call8
-// CHECK: [[t:%.*]] = alloca %struct.T, align 1
-// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.T, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[t]], ptr align 1 {{.*}}, i32 12, i1 false)
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[t]], i32 12, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1
-// CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2
-// CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[G3]], align 4
-// CHECK-NEXT: store i32 [[L1]], ptr [[G1]], align 4
-// CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L2]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
-export void call8() {
+// CHECK-LABEL: define hidden void @_Z5call8v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 1
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_T]], align 1
+// CHECK-NEXT:    [[REF_TMP6:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[AGG_TEMP7:%.*]] = alloca [[STRUCT_T]], align 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[T]], ptr align 1 @__const._Z5call8v.t, i32 12, i1 false)
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[T]], i32 12, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[AGG_TEMP]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP2]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X5]], align 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[X]], align 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP7]], ptr align 1 [[T]], i32 12, i1 false)
+// CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP6]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[REF_TMP6]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[AGG_TEMP7]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[AGG_TEMP7]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP12:%.*]] = getelementptr inbounds [[STRUCT_T]], ptr [[AGG_TEMP7]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP10]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP8]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP11]], align 4
+// CHECK-NEXT:    [[CONV13:%.*]] = sitofp i32 [[TMP4]] to float
+// CHECK-NEXT:    store float [[CONV13]], ptr [[GEP9]], align 4
+// CHECK-NEXT:    [[Y14:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[REF_TMP6]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[Y14]], align 1
+// CHECK-NEXT:    store float [[TMP5]], ptr [[Y]], align 1
+// CHECK-NEXT:    ret void
+//
+void call8() {
   T t = {1,2,3};
   S s = (S)t;
 }
@@ -151,83 +279,260 @@ struct Derived : BFields {
 };
 
 // Derived Struct truncate to scalar
-// CHECK-LABEL: call9
-// CHECK: [[D2:%.*]] = alloca double, align 8
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
-// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
-// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
-// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2
-// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: [[A:%.*]] = load double, ptr [[Gep1]], align 8
-// CHECK-NEXT: store double [[A]], ptr [[D2]], align 8
-// CHECK-NEXT: ret void
-export void call9(Derived D) {
+// CHECK-LABEL: define hidden void @_Z5call97Derived(
+// CHECK-SAME: ptr noundef dead_on_return [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D2:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED:%.*]], align 1
+// CHECK-NEXT:    store ptr [[D]], ptr [[D_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[GEP1]], align 8
+// CHECK-NEXT:    store double [[TMP0]], ptr [[D2]], align 8
+// CHECK-NEXT:    ret void
+//
+void call9(Derived D) {
   double D2 = (double)D;
 }
 
 // Derived struct from vector
-// CHECK-LABEL: call10
-// CHECK: [[IAddr:%.*]] = alloca <4 x i32>, align 4
-// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: store <4 x i32> %I, ptr [[IAddr]], align 4
-// CHECK-NEXT: [[A:%.*]] = load <4 x i32>, ptr [[IAddr]], align 4
-// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0
-// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
-// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 0
-// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 0, i32 2
-// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[D]], i32 0, i32 1
-// CHECK-NEXT: [[VL:%.*]] = extractelement <4 x i32> [[A]], i64 0
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to double
-// CHECK-NEXT: store double [[C]], ptr [[Gep1]], align 8
-// CHECK-NEXT: [[VL4:%.*]] = extractelement <4 x i32> [[A]], i64 1
-// CHECK-NEXT: [[B:%.*]] = trunc i32 [[VL4]] to i24
-// CHECK-NEXT: [[BFL:%.*]] = load i24, ptr [[E]], align 1
-// CHECK-NEXT: [[BFV:%.*]] = and i24 [[B]], 32767
-// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL]], -32768
-// CHECK-NEXT: [[BFSet:%.*]] = or i24 [[BFC]], [[BFV]]
-// CHECK-NEXT: store i24 [[BFSet]], ptr [[E]], align 1
-// CHECK-NEXT: [[VL5:%.*]] = extractelement <4 x i32> [[A]], i64 2
-// CHECK-NEXT: [[C6:%.*]] = sitofp i32 [[VL5]] to float
-// CHECK-NEXT: store float [[C6]], ptr [[Gep2]], align 4
-// CHECK-NEXT: [[VL7:%.*]] = extractelement <4 x i32> [[A]], i64 3
-// CHECK-NEXT: store i32 [[VL7]], ptr [[Gep3]], align 4
-// CHECK-NEXT: ret void
-export void call10(int4 I) {
+// CHECK-LABEL: define hidden void @_Z6call10Dv4_i(
+// CHECK-SAME: <4 x i32> noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT:    [[D:%.*]] = alloca [[STRUCT_DERIVED:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP11:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP33:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP50:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    store <4 x i32> [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[D]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[VEC_LOAD:%.*]] = extractelement <4 x i32> [[TMP0]], i64 0
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VEC_LOAD]] to double
+// CHECK-NEXT:    store double [[CONV]], ptr [[GEP2]], align 8
+// CHECK-NEXT:    [[VEC_LOAD5:%.*]] = extractelement <4 x i32> [[TMP0]], i64 1
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[VEC_LOAD5]] to i24
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT:    [[BF_VALUE:%.*]] = and i24 [[TMP1]], 32767
+// CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i24 [[BF_LOAD]], -32768
+// CHECK-NEXT:    [[BF_SET:%.*]] = or i24 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK-NEXT:    store i24 [[BF_SET]], ptr [[E]], align 1
+// CHECK-NEXT:    [[VEC_LOAD6:%.*]] = extractelement <4 x i32> [[TMP0]], i64 2
+// CHECK-NEXT:    [[CONV7:%.*]] = sitofp i32 [[VEC_LOAD6]] to float
+// CHECK-NEXT:    store float [[CONV7]], ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[VEC_LOAD8:%.*]] = extractelement <4 x i32> [[TMP0]], i64 3
+// CHECK-NEXT:    store i32 [[VEC_LOAD8]], ptr [[GEP4]], align 4
+// CHECK-NEXT:    [[D9:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[D9]], align 1
+// CHECK-NEXT:    store double [[TMP2]], ptr [[D1]], align 1
+// CHECK-NEXT:    [[E10:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[D]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[GEP12:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP11]], i32 0, i32 0
+// CHECK-NEXT:    [[E13:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP12]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP14:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP11]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP15:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP11]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP16:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP11]], i32 0, i32 1
+// CHECK-NEXT:    [[VEC_LOAD17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+// CHECK-NEXT:    [[CONV18:%.*]] = sitofp i32 [[VEC_LOAD17]] to double
+// CHECK-NEXT:    store double [[CONV18]], ptr [[GEP14]], align 8
+// CHECK-NEXT:    [[VEC_LOAD19:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[VEC_LOAD19]] to i24
+// CHECK-NEXT:    [[BF_LOAD20:%.*]] = load i24, ptr [[E13]], align 1
+// CHECK-NEXT:    [[BF_VALUE21:%.*]] = and i24 [[TMP4]], 32767
+// CHECK-NEXT:    [[BF_CLEAR22:%.*]] = and i24 [[BF_LOAD20]], -32768
+// CHECK-NEXT:    [[BF_SET23:%.*]] = or i24 [[BF_CLEAR22]], [[BF_VALUE21]]
+// CHECK-NEXT:    store i24 [[BF_SET23]], ptr [[E13]], align 1
+// CHECK-NEXT:    [[VEC_LOAD24:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
+// CHECK-NEXT:    [[CONV25:%.*]] = sitofp i32 [[VEC_LOAD24]] to float
+// CHECK-NEXT:    store float [[CONV25]], ptr [[GEP15]], align 4
+// CHECK-NEXT:    [[VEC_LOAD26:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
+// CHECK-NEXT:    store i32 [[VEC_LOAD26]], ptr [[GEP16]], align 4
+// CHECK-NEXT:    [[E27:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP11]], i32 0, i32 1
+// CHECK-NEXT:    [[BF_LOAD28:%.*]] = load i24, ptr [[E27]], align 1
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i24 [[BF_LOAD28]], 9
+// CHECK-NEXT:    [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
+// CHECK-NEXT:    [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[BF_CAST]] to i24
+// CHECK-NEXT:    [[BF_LOAD29:%.*]] = load i24, ptr [[E10]], align 1
+// CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i24 [[TMP5]], 32767
+// CHECK-NEXT:    [[BF_CLEAR31:%.*]] = and i24 [[BF_LOAD29]], -32768
+// CHECK-NEXT:    [[BF_SET32:%.*]] = or i24 [[BF_CLEAR31]], [[BF_VALUE30]]
+// CHECK-NEXT:    store i24 [[BF_SET32]], ptr [[E10]], align 1
+// CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[D]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[GEP34:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP33]], i32 0, i32 0
+// CHECK-NEXT:    [[E35:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP34]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP36:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP33]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP37:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP33]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP38:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP33]], i32 0, i32 1
+// CHECK-NEXT:    [[VEC_LOAD39:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0
+// CHECK-NEXT:    [[CONV40:%.*]] = sitofp i32 [[VEC_LOAD39]] to double
+// CHECK-NEXT:    store double [[CONV40]], ptr [[GEP36]], align 8
+// CHECK-NEXT:    [[VEC_LOAD41:%.*]] = extractelement <4 x i32> [[TMP6]], i64 1
+// CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[VEC_LOAD41]] to i24
+// CHECK-NEXT:    [[BF_LOAD42:%.*]] = load i24, ptr [[E35]], align 1
+// CHECK-NEXT:    [[BF_VALUE43:%.*]] = and i24 [[TMP7]], 32767
+// CHECK-NEXT:    [[BF_CLEAR44:%.*]] = and i24 [[BF_LOAD42]], -32768
+// CHECK-NEXT:    [[BF_SET45:%.*]] = or i24 [[BF_CLEAR44]], [[BF_VALUE43]]
+// CHECK-NEXT:    store i24 [[BF_SET45]], ptr [[E35]], align 1
+// CHECK-NEXT:    [[VEC_LOAD46:%.*]] = extractelement <4 x i32> [[TMP6]], i64 2
+// CHECK-NEXT:    [[CONV47:%.*]] = sitofp i32 [[VEC_LOAD46]] to float
+// CHECK-NEXT:    store float [[CONV47]], ptr [[GEP37]], align 4
+// CHECK-NEXT:    [[VEC_LOAD48:%.*]] = extractelement <4 x i32> [[TMP6]], i64 3
+// CHECK-NEXT:    store i32 [[VEC_LOAD48]], ptr [[GEP38]], align 4
+// CHECK-NEXT:    [[F49:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP33]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F49]], align 1
+// CHECK-NEXT:    store float [[TMP8]], ptr [[F]], align 1
+// CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds nuw [[STRUCT_DERIVED]], ptr [[D]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[GEP51:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP50]], i32 0, i32 0
+// CHECK-NEXT:    [[E52:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP51]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP53:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP50]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP54:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP50]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP55:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[REF_TMP50]], i32 0, i32 1
+// CHECK-NEXT:    [[VEC_LOAD56:%.*]] = extractelement <4 x i32> [[TMP9]], i64 0
+// CHECK-NEXT:    [[CONV57:%.*]] = sitofp i32 [[VEC_LOAD56]] to double
+// CHECK-NEXT:    store double [[CONV57]], ptr [[GEP53]], align 8
+// CHECK-NEXT:    [[VEC_LOAD58:%.*]] = extractelement <4 x i32> [[TMP9]], i64 1
+// CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[VEC_LOAD58]] to i24
+// CHECK-NEXT:    [[BF_LOAD59:%.*]] = load i24, ptr [[E52]], align 1
+// CHECK-NEXT:    [[BF_VALUE60:%.*]] = and i24 [[TMP10]], 32767
+// CHECK-NEXT:    [[BF_CLEAR61:%.*]] = and i24 [[BF_LOAD59]], -32768
+// CHECK-NEXT:    [[BF_SET62:%.*]] = or i24 [[BF_CLEAR61]], [[BF_VALUE60]]
+// CHECK-NEXT:    store i24 [[BF_SET62]], ptr [[E52]], align 1
+// CHECK-NEXT:    [[VEC_LOAD63:%.*]] = extractelement <4 x i32> [[TMP9]], i64 2
+// CHECK-NEXT:    [[CONV64:%.*]] = sitofp i32 [[VEC_LOAD63]] to float
+// CHECK-NEXT:    store float [[CONV64]], ptr [[GEP54]], align 4
+// CHECK-NEXT:    [[VEC_LOAD65:%.*]] = extractelement <4 x i32> [[TMP9]], i64 3
+// CHECK-NEXT:    store i32 [[VEC_LOAD65]], ptr [[GEP55]], align 4
+// CHECK-NEXT:    [[G66:%.*]] = getelementptr inbounds nuw [[STRUCT_DERIVED]], ptr [[REF_TMP50]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[G66]], align 1
+// CHECK-NEXT:    store i32 [[TMP11]], ptr [[G]], align 1
+// CHECK-NEXT:    ret void
+//
+void call10(int4 I) {
   Derived D = (Derived)I;
 }
 
 // truncate derived struct
-// CHECK-LABEL: call11
-// CHECK: [[B:%.*]] = alloca %struct.BFields, align 1
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[D]], i32 19, i1 false)
-// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0
-// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
-// CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0, i32 0
-// CHECK-NEXT: [[Gep2:%.*]] = getelementptr inbounds %struct.BFields, ptr [[B]], i32 0, i32 2
-// CHECK-NEXT: [[Gep3:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep3]], i32 0, i32 1
-// CHECK-NEXT: [[Gep5:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
-// CHECK-NEXT: [[Gep6:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 2
-// CHECK-NEXT: [[Gep7:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 1
-// CHECK-NEXT: [[A:%.*]] = load double, ptr [[Gep5]], align 8
-// CHECK-NEXT: store double [[A]], ptr [[Gep1]], align 8
-// CHECK-NEXT: [[BFl:%.*]] = load i24, ptr [[E4]], align 1
-// CHECK-NEXT: [[Shl:%.*]] = shl i24 [[BFL]], 9
-// CHECK-NEXT: [[Ashr:%.*]] = ashr i24 [[Shl]], 9
-// CHECK-NEXT: [[BFC:%.*]] = sext i24 [[Ashr]] to i32
-// CHECK-NEXT: [[B:%.*]] = trunc i32 [[BFC]] to i24
-// CHECK-NEXT: [[BFL8:%.*]] = load i24, ptr [[E]], align 1
-// CHECK-NEXT: [[BFV:%.*]] = and i24 [[B]], 32767
-// CHECK-NEXT: [[BFC:%.*]] = and i24 [[BFL8]], -32768
-// CHECK-NEXT: [[BFSet:%.*]] = or i24 [[BFC]], [[BFV]]
-// CHECK-NEXT: store i24 [[BFSet]], ptr [[E]], align 1
-// CHECK-NEXT: [[C:%.*]] = load float, ptr [[Gep6]], align 4
-// CHECK-NEXT: store float [[C]], ptr [[Gep2]], align 4
-// CHECK-NEXT: ret void
-export void call11(Derived D) {
+// CHECK-LABEL: define hidden void @_Z6call117Derived(
+// CHECK-SAME: ptr noundef dead_on_return [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D_INDIRECT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_BFIELDS:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_BFIELDS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP12:%.*]] = alloca [[STRUCT_BFIELDS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP13:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[REF_TMP40:%.*]] = alloca [[STRUCT_BFIELDS]], align 1
+// CHECK-NEXT:    [[AGG_TEMP41:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    store ptr [[D]], ptr [[D_INDIRECT_ADDR]], align 4
+// CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP]], i32 0
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP4]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP8:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[GEP6]], align 8
+// CHECK-NEXT:    store double [[TMP0]], ptr [[GEP2]], align 8
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i24, ptr [[E5]], align 1
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i24 [[BF_LOAD]], 9
+// CHECK-NEXT:    [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
+// CHECK-NEXT:    [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[BF_CAST]] to i24
+// CHECK-NEXT:    [[BF_LOAD9:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT:    [[BF_VALUE:%.*]] = and i24 [[TMP1]], 32767
+// CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i24 [[BF_LOAD9]], -32768
+// CHECK-NEXT:    [[BF_SET:%.*]] = or i24 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK-NEXT:    store i24 [[BF_SET]], ptr [[E]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[GEP7]], align 4
+// CHECK-NEXT:    store float [[TMP2]], ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[D10:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[D10]], align 1
+// CHECK-NEXT:    store double [[TMP3]], ptr [[D1]], align 1
+// CHECK-NEXT:    [[E11:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[B]], i32 0, i32 1
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP13]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP14:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP12]], i32 0
+// CHECK-NEXT:    [[E15:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP14]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP16:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP12]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP17:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP12]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP18:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP13]], i32 0, i32 0
+// CHECK-NEXT:    [[E19:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP18]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP20:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP13]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP21:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP13]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP13]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[GEP20]], align 8
+// CHECK-NEXT:    store double [[TMP4]], ptr [[GEP16]], align 8
+// CHECK-NEXT:    [[BF_LOAD23:%.*]] = load i24, ptr [[E19]], align 1
+// CHECK-NEXT:    [[BF_SHL24:%.*]] = shl i24 [[BF_LOAD23]], 9
+// CHECK-NEXT:    [[BF_ASHR25:%.*]] = ashr i24 [[BF_SHL24]], 9
+// CHECK-NEXT:    [[BF_CAST26:%.*]] = sext i24 [[BF_ASHR25]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[BF_CAST26]] to i24
+// CHECK-NEXT:    [[BF_LOAD27:%.*]] = load i24, ptr [[E15]], align 1
+// CHECK-NEXT:    [[BF_VALUE28:%.*]] = and i24 [[TMP5]], 32767
+// CHECK-NEXT:    [[BF_CLEAR29:%.*]] = and i24 [[BF_LOAD27]], -32768
+// CHECK-NEXT:    [[BF_SET30:%.*]] = or i24 [[BF_CLEAR29]], [[BF_VALUE28]]
+// CHECK-NEXT:    store i24 [[BF_SET30]], ptr [[E15]], align 1
+// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[GEP21]], align 4
+// CHECK-NEXT:    store float [[TMP6]], ptr [[GEP17]], align 4
+// CHECK-NEXT:    [[E31:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP12]], i32 0, i32 1
+// CHECK-NEXT:    [[BF_LOAD32:%.*]] = load i24, ptr [[E31]], align 1
+// CHECK-NEXT:    [[BF_SHL33:%.*]] = shl i24 [[BF_LOAD32]], 9
+// CHECK-NEXT:    [[BF_ASHR34:%.*]] = ashr i24 [[BF_SHL33]], 9
+// CHECK-NEXT:    [[BF_CAST35:%.*]] = sext i24 [[BF_ASHR34]] to i32
+// CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[BF_CAST35]] to i24
+// CHECK-NEXT:    [[BF_LOAD36:%.*]] = load i24, ptr [[E11]], align 1
+// CHECK-NEXT:    [[BF_VALUE37:%.*]] = and i24 [[TMP7]], 32767
+// CHECK-NEXT:    [[BF_CLEAR38:%.*]] = and i24 [[BF_LOAD36]], -32768
+// CHECK-NEXT:    [[BF_SET39:%.*]] = or i24 [[BF_CLEAR38]], [[BF_VALUE37]]
+// CHECK-NEXT:    store i24 [[BF_SET39]], ptr [[E11]], align 1
+// CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[B]], i32 0, i32 2
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP41]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP42:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP40]], i32 0
+// CHECK-NEXT:    [[E43:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP42]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP44:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP40]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP45:%.*]] = getelementptr inbounds [[STRUCT_BFIELDS]], ptr [[REF_TMP40]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP46:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP41]], i32 0, i32 0
+// CHECK-NEXT:    [[E47:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[GEP46]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP48:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP41]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP49:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP41]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP50:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP41]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP8:%.*]] = load double, ptr [[GEP48]], align 8
+// CHECK-NEXT:    store double [[TMP8]], ptr [[GEP44]], align 8
+// CHECK-NEXT:    [[BF_LOAD51:%.*]] = load i24, ptr [[E47]], align 1
+// CHECK-NEXT:    [[BF_SHL52:%.*]] = shl i24 [[BF_LOAD51]], 9
+// CHECK-NEXT:    [[BF_ASHR53:%.*]] = ashr i24 [[BF_SHL52]], 9
+// CHECK-NEXT:    [[BF_CAST54:%.*]] = sext i24 [[BF_ASHR53]] to i32
+// CHECK-NEXT:    [[TMP9:%.*]] = trunc i32 [[BF_CAST54]] to i24
+// CHECK-NEXT:    [[BF_LOAD55:%.*]] = load i24, ptr [[E43]], align 1
+// CHECK-NEXT:    [[BF_VALUE56:%.*]] = and i24 [[TMP9]], 32767
+// CHECK-NEXT:    [[BF_CLEAR57:%.*]] = and i24 [[BF_LOAD55]], -32768
+// CHECK-NEXT:    [[BF_SET58:%.*]] = or i24 [[BF_CLEAR57]], [[BF_VALUE56]]
+// CHECK-NEXT:    store i24 [[BF_SET58]], ptr [[E43]], align 1
+// CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[GEP49]], align 4
+// CHECK-NEXT:    store float [[TMP10]], ptr [[GEP45]], align 4
+// CHECK-NEXT:    [[F59:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS]], ptr [[REF_TMP40]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[F59]], align 1
+// CHECK-NEXT:    store float [[TMP11]], ptr [[F]], align 1
+// CHECK-NEXT:    ret void
+//
+void call11(Derived D) {
   BFields B = (BFields)D;
 }
 
@@ -235,13 +540,16 @@ struct Empty {
 };
 
 // cast to an empty struct
-// CHECK-LABEL: call12
-// CHECK: [[I:%.*]] = alloca <4 x i32>, align 4
-// CHECK-NEXT: [[E:%.*]] = alloca %struct.Empty, align 1
-// CHECK-NEXT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[I]], align 4
-// CHECK-NEXT: [[A:%.*]] = load <4 x i32>, ptr [[I]], align 4
-// CHECK-NEXt: ret void
-export void call12() {
+// CHECK-LABEL: define hidden void @_Z6call12v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT:    [[E:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
+// CHECK-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[I]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[E]], ptr align 1 @__const._Z6call12v.E, i32 1, i1 false)
+// CHECK-NEXT:    ret void
+//
+void call12() {
   int4 I = {1,2,3,4};
   Empty E = (Empty)I;
 }
@@ -259,55 +567,313 @@ struct MoreBFields {
 };
 
 // more complicated bitfield case
-// CHECK-LABEL: call13
-// CHECK: [[AA:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[MBF:%.*]] = alloca %struct.MoreBFields, align 1
-// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
-// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[AA]], align 4
 // get the gep for the struct.
-// CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0
-// CHECK-NEXT: [[FieldB:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 1
 // D and E share the same field index
-// CHECK-NEXT: [[FieldD:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 3
-// CHECK-NEXT: [[FieldE:%.*]] = getelementptr inbounds nuw %struct.MoreBFields, ptr [[Gep]], i32 0, i32 3
-// CHECK-NEXT: [[FieldA:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 0
-// CHECK-NEXT: [[FieldC:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 2
-// CHECK-NEXT: [[FieldF:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 5
-// CHECK-NEXT: [[FieldG:%.*]] = getelementptr inbounds %struct.MoreBFields, ptr [[MBF]], i32 0, i32 7
 // store int A into field A
-// CHECK-NEXT: store i32 [[Z]], ptr [[FieldA]], align 4
 // store int A in bitField B, do necessary conversions
-// CHECK-NEXT: [[Conv:%.*]] = sext i32 [[Z]] to i64
-// CHECK-NEXT: [[BFL:%.*]] = load i64, ptr [[FieldB]], align 1
-// CHECK-NEXT: [[BFV:%.*]] = and i64 [[Conv]], 1152921504606846975
-// CHECK-NEXT: [[BFC:%.*]] = and i64 [[BFL]], -1152921504606846976
-// CHECK-NEXT: [[BFS:%.*]] = or i64 [[BFC]], [[BFV]]
-// CHECK-NEXT: store i64 [[BFS]], ptr [[FieldB]], align 1
 // store int A into field C
-// CHECK-NEXT: [[Conv5:%.*]] = sitofp i32 [[Z]] to float
-// CHECK-NEXT: store float [[Conv5]], ptr [[FieldC]], align 4
 // store int A into bitfield D
-// CHECK-NEXT: [[Conv6:%.*]] = trunc i32 [[Z]] to i16
-// CHECK-NEXT: [[FDL:%.*]] = load i16, ptr [[FieldD]], align 1
-// CHECK-NEXT: [[FDV:%.*]] = and i16 [[Conv6]], 1023
-// CHECK-NEXT: [[FDC:%.*]] = and i16 [[FDL]], -1024
-// CHECK-NEXT: [[FDS:%.*]] = or i16 [[FDC]], [[FDV]]
-// CHECK-NEXT: store i16 [[FDS]], ptr [[FieldD]], align 1
 // store int A into bitfield E;
-// CHECK-NEXT: [[Conv11:%.*]] = trunc i32 [[Z]] to i16
-// CHECK-NEXT: [[FEL:%.*]] = load i16, ptr [[FieldE]], align 1
-// CHECK-NEXT: [[FEV:%.*]] = and i16 [[Conv11]], 63
-// CHECK-NEXT: [[FESHL:%.*]] = shl i16 [[FEV]], 10
-// CHECK-NEXT: [[FEC:%.*]] = and i16 [[FEL]], 1023
-// CHECK-NEXT: [[FES:%.*]] = or i16 [[FEC]], [[FESHL]]
-// CHECK-NEXT: store i16 [[FES]], ptr [[FieldE]], align 1
 // store int A into field F
-// CHECK-NEXT: [[Conv16:%.*]] = sitofp i32 [[Z]] to double
-// CHECK-NEXT: store double [[Conv16]], ptr [[FieldF]], align 8
 // store int A into field G
-// CHECK-NEXT: store i32 [[Z]], ptr [[FieldG]], align 4
-// CHECK-NEXT: ret void
-export void call13(int A) {
+// CHECK-LABEL: define hidden void @_Z6call13i(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[MBF:%.*]] = alloca [[STRUCT_MOREBFIELDS:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    [[REF_TMP20:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    [[REF_TMP54:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    [[REF_TMP83:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    [[REF_TMP118:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    [[REF_TMP152:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    [[REF_TMP180:%.*]] = alloca [[STRUCT_MOREBFIELDS]], align 1
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP]], i32 0
+// CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[D:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP]], i32 0, i32 3
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP2]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i64, ptr [[B]], align 1
+// CHECK-NEXT:    [[BF_VALUE:%.*]] = and i64 [[CONV]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK-NEXT:    store i64 [[BF_SET]], ptr [[B]], align 1
+// CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    store float [[CONV6]], ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[CONV7:%.*]] = trunc i32 [[TMP0]] to i16
+// CHECK-NEXT:    [[BF_LOAD8:%.*]] = load i16, ptr [[D]], align 1
+// CHECK-NEXT:    [[BF_VALUE9:%.*]] = and i16 [[CONV7]], 1023
+// CHECK-NEXT:    [[BF_CLEAR10:%.*]] = and i16 [[BF_LOAD8]], -1024
+// CHECK-NEXT:    [[BF_SET11:%.*]] = or i16 [[BF_CLEAR10]], [[BF_VALUE9]]
+// CHECK-NEXT:    store i16 [[BF_SET11]], ptr [[D]], align 1
+// CHECK-NEXT:    [[CONV12:%.*]] = trunc i32 [[TMP0]] to i16
+// CHECK-NEXT:    [[BF_LOAD13:%.*]] = load i16, ptr [[E]], align 1
+// CHECK-NEXT:    [[BF_VALUE14:%.*]] = and i16 [[CONV12]], 63
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_VALUE14]], 10
+// CHECK-NEXT:    [[BF_CLEAR15:%.*]] = and i16 [[BF_LOAD13]], 1023
+// CHECK-NEXT:    [[BF_SET16:%.*]] = or i16 [[BF_CLEAR15]], [[BF_SHL]]
+// CHECK-NEXT:    store i16 [[BF_SET16]], ptr [[E]], align 1
+// CHECK-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP0]] to double
+// CHECK-NEXT:    store double [[CONV17]], ptr [[GEP4]], align 8
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP5]], align 4
+// CHECK-NEXT:    [[A18:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A18]], align 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[A1]], align 1
+// CHECK-NEXT:    [[B19:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP21:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP20]], i32 0
+// CHECK-NEXT:    [[B22:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP21]], i32 0, i32 1
+// CHECK-NEXT:    [[D23:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP21]], i32 0, i32 3
+// CHECK-NEXT:    [[E24:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP21]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP25:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP20]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP26:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP20]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP27:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP20]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP28:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP20]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[GEP25]], align 4
+// CHECK-NEXT:    [[CONV29:%.*]] = sext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[BF_LOAD30:%.*]] = load i64, ptr [[B22]], align 1
+// CHECK-NEXT:    [[BF_VALUE31:%.*]] = and i64 [[CONV29]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR32:%.*]] = and i64 [[BF_LOAD30]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET33:%.*]] = or i64 [[BF_CLEAR32]], [[BF_VALUE31]]
+// CHECK-NEXT:    store i64 [[BF_SET33]], ptr [[B22]], align 1
+// CHECK-NEXT:    [[CONV34:%.*]] = sitofp i32 [[TMP2]] to float
+// CHECK-NEXT:    store float [[CONV34]], ptr [[GEP26]], align 4
+// CHECK-NEXT:    [[CONV35:%.*]] = trunc i32 [[TMP2]] to i16
+// CHECK-NEXT:    [[BF_LOAD36:%.*]] = load i16, ptr [[D23]], align 1
+// CHECK-NEXT:    [[BF_VALUE37:%.*]] = and i16 [[CONV35]], 1023
+// CHECK-NEXT:    [[BF_CLEAR38:%.*]] = and i16 [[BF_LOAD36]], -1024
+// CHECK-NEXT:    [[BF_SET39:%.*]] = or i16 [[BF_CLEAR38]], [[BF_VALUE37]]
+// CHECK-NEXT:    store i16 [[BF_SET39]], ptr [[D23]], align 1
+// CHECK-NEXT:    [[CONV40:%.*]] = trunc i32 [[TMP2]] to i16
+// CHECK-NEXT:    [[BF_LOAD41:%.*]] = load i16, ptr [[E24]], align 1
+// CHECK-NEXT:    [[BF_VALUE42:%.*]] = and i16 [[CONV40]], 63
+// CHECK-NEXT:    [[BF_SHL43:%.*]] = shl i16 [[BF_VALUE42]], 10
+// CHECK-NEXT:    [[BF_CLEAR44:%.*]] = and i16 [[BF_LOAD41]], 1023
+// CHECK-NEXT:    [[BF_SET45:%.*]] = or i16 [[BF_CLEAR44]], [[BF_SHL43]]
+// CHECK-NEXT:    store i16 [[BF_SET45]], ptr [[E24]], align 1
+// CHECK-NEXT:    [[CONV46:%.*]] = sitofp i32 [[TMP2]] to double
+// CHECK-NEXT:    store double [[CONV46]], ptr [[GEP27]], align 8
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[GEP28]], align 4
+// CHECK-NEXT:    [[B47:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP20]], i32 0, i32 1
+// CHECK-NEXT:    [[BF_LOAD48:%.*]] = load i64, ptr [[B47]], align 1
+// CHECK-NEXT:    [[BF_CLEAR49:%.*]] = and i64 [[BF_LOAD48]], 1152921504606846975
+// CHECK-NEXT:    [[BF_LOAD50:%.*]] = load i64, ptr [[B19]], align 1
+// CHECK-NEXT:    [[BF_VALUE51:%.*]] = and i64 [[BF_CLEAR49]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR52:%.*]] = and i64 [[BF_LOAD50]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET53:%.*]] = or i64 [[BF_CLEAR52]], [[BF_VALUE51]]
+// CHECK-NEXT:    store i64 [[BF_SET53]], ptr [[B19]], align 1
+// CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP55:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP54]], i32 0
+// CHECK-NEXT:    [[B56:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP55]], i32 0, i32 1
+// CHECK-NEXT:    [[D57:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP55]], i32 0, i32 3
+// CHECK-NEXT:    [[E58:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP55]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP59:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP54]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP60:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP54]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP61:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP54]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP62:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP54]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP59]], align 4
+// CHECK-NEXT:    [[CONV63:%.*]] = sext i32 [[TMP3]] to i64
+// CHECK-NEXT:    [[BF_LOAD64:%.*]] = load i64, ptr [[B56]], align 1
+// CHECK-NEXT:    [[BF_VALUE65:%.*]] = and i64 [[CONV63]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR66:%.*]] = and i64 [[BF_LOAD64]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET67:%.*]] = or i64 [[BF_CLEAR66]], [[BF_VALUE65]]
+// CHECK-NEXT:    store i64 [[BF_SET67]], ptr [[B56]], align 1
+// CHECK-NEXT:    [[CONV68:%.*]] = sitofp i32 [[TMP3]] to float
+// CHECK-NEXT:    store float [[CONV68]], ptr [[GEP60]], align 4
+// CHECK-NEXT:    [[CONV69:%.*]] = trunc i32 [[TMP3]] to i16
+// CHECK-NEXT:    [[BF_LOAD70:%.*]] = load i16, ptr [[D57]], align 1
+// CHECK-NEXT:    [[BF_VALUE71:%.*]] = and i16 [[CONV69]], 1023
+// CHECK-NEXT:    [[BF_CLEAR72:%.*]] = and i16 [[BF_LOAD70]], -1024
+// CHECK-NEXT:    [[BF_SET73:%.*]] = or i16 [[BF_CLEAR72]], [[BF_VALUE71]]
+// CHECK-NEXT:    store i16 [[BF_SET73]], ptr [[D57]], align 1
+// CHECK-NEXT:    [[CONV74:%.*]] = trunc i32 [[TMP3]] to i16
+// CHECK-NEXT:    [[BF_LOAD75:%.*]] = load i16, ptr [[E58]], align 1
+// CHECK-NEXT:    [[BF_VALUE76:%.*]] = and i16 [[CONV74]], 63
+// CHECK-NEXT:    [[BF_SHL77:%.*]] = shl i16 [[BF_VALUE76]], 10
+// CHECK-NEXT:    [[BF_CLEAR78:%.*]] = and i16 [[BF_LOAD75]], 1023
+// CHECK-NEXT:    [[BF_SET79:%.*]] = or i16 [[BF_CLEAR78]], [[BF_SHL77]]
+// CHECK-NEXT:    store i16 [[BF_SET79]], ptr [[E58]], align 1
+// CHECK-NEXT:    [[CONV80:%.*]] = sitofp i32 [[TMP3]] to double
+// CHECK-NEXT:    store double [[CONV80]], ptr [[GEP61]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP62]], align 4
+// CHECK-NEXT:    [[C81:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP54]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[C81]], align 1
+// CHECK-NEXT:    store float [[TMP4]], ptr [[C]], align 1
+// CHECK-NEXT:    [[D82:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 3
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP84:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP83]], i32 0
+// CHECK-NEXT:    [[B85:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP84]], i32 0, i32 1
+// CHECK-NEXT:    [[D86:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP84]], i32 0, i32 3
+// CHECK-NEXT:    [[E87:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP84]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP88:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP83]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP89:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP83]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP90:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP83]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP91:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP83]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[GEP88]], align 4
+// CHECK-NEXT:    [[CONV92:%.*]] = sext i32 [[TMP5]] to i64
+// CHECK-NEXT:    [[BF_LOAD93:%.*]] = load i64, ptr [[B85]], align 1
+// CHECK-NEXT:    [[BF_VALUE94:%.*]] = and i64 [[CONV92]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR95:%.*]] = and i64 [[BF_LOAD93]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET96:%.*]] = or i64 [[BF_CLEAR95]], [[BF_VALUE94]]
+// CHECK-NEXT:    store i64 [[BF_SET96]], ptr [[B85]], align 1
+// CHECK-NEXT:    [[CONV97:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT:    store float [[CONV97]], ptr [[GEP89]], align 4
+// CHECK-NEXT:    [[CONV98:%.*]] = trunc i32 [[TMP5]] to i16
+// CHECK-NEXT:    [[BF_LOAD99:%.*]] = load i16, ptr [[D86]], align 1
+// CHECK-NEXT:    [[BF_VALUE100:%.*]] = and i16 [[CONV98]], 1023
+// CHECK-NEXT:    [[BF_CLEAR101:%.*]] = and i16 [[BF_LOAD99]], -1024
+// CHECK-NEXT:    [[BF_SET102:%.*]] = or i16 [[BF_CLEAR101]], [[BF_VALUE100]]
+// CHECK-NEXT:    store i16 [[BF_SET102]], ptr [[D86]], align 1
+// CHECK-NEXT:    [[CONV103:%.*]] = trunc i32 [[TMP5]] to i16
+// CHECK-NEXT:    [[BF_LOAD104:%.*]] = load i16, ptr [[E87]], align 1
+// CHECK-NEXT:    [[BF_VALUE105:%.*]] = and i16 [[CONV103]], 63
+// CHECK-NEXT:    [[BF_SHL106:%.*]] = shl i16 [[BF_VALUE105]], 10
+// CHECK-NEXT:    [[BF_CLEAR107:%.*]] = and i16 [[BF_LOAD104]], 1023
+// CHECK-NEXT:    [[BF_SET108:%.*]] = or i16 [[BF_CLEAR107]], [[BF_SHL106]]
+// CHECK-NEXT:    store i16 [[BF_SET108]], ptr [[E87]], align 1
+// CHECK-NEXT:    [[CONV109:%.*]] = sitofp i32 [[TMP5]] to double
+// CHECK-NEXT:    store double [[CONV109]], ptr [[GEP90]], align 8
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[GEP91]], align 4
+// CHECK-NEXT:    [[D110:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP83]], i32 0, i32 3
+// CHECK-NEXT:    [[BF_LOAD111:%.*]] = load i16, ptr [[D110]], align 1
+// CHECK-NEXT:    [[BF_CLEAR112:%.*]] = and i16 [[BF_LOAD111]], 1023
+// CHECK-NEXT:    [[BF_LOAD113:%.*]] = load i16, ptr [[D82]], align 1
+// CHECK-NEXT:    [[BF_VALUE114:%.*]] = and i16 [[BF_CLEAR112]], 1023
+// CHECK-NEXT:    [[BF_CLEAR115:%.*]] = and i16 [[BF_LOAD113]], -1024
+// CHECK-NEXT:    [[BF_SET116:%.*]] = or i16 [[BF_CLEAR115]], [[BF_VALUE114]]
+// CHECK-NEXT:    store i16 [[BF_SET116]], ptr [[D82]], align 1
+// CHECK-NEXT:    [[E117:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 3
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP119:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP118]], i32 0
+// CHECK-NEXT:    [[B120:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP119]], i32 0, i32 1
+// CHECK-NEXT:    [[D121:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP119]], i32 0, i32 3
+// CHECK-NEXT:    [[E122:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP119]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP123:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP118]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP124:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP118]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP125:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP118]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP126:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP118]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[GEP123]], align 4
+// CHECK-NEXT:    [[CONV127:%.*]] = sext i32 [[TMP6]] to i64
+// CHECK-NEXT:    [[BF_LOAD128:%.*]] = load i64, ptr [[B120]], align 1
+// CHECK-NEXT:    [[BF_VALUE129:%.*]] = and i64 [[CONV127]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR130:%.*]] = and i64 [[BF_LOAD128]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET131:%.*]] = or i64 [[BF_CLEAR130]], [[BF_VALUE129]]
+// CHECK-NEXT:    store i64 [[BF_SET131]], ptr [[B120]], align 1
+// CHECK-NEXT:    [[CONV132:%.*]] = sitofp i32 [[TMP6]] to float
+// CHECK-NEXT:    store float [[CONV132]], ptr [[GEP124]], align 4
+// CHECK-NEXT:    [[CONV133:%.*]] = trunc i32 [[TMP6]] to i16
+// CHECK-NEXT:    [[BF_LOAD134:%.*]] = load i16, ptr [[D121]], align 1
+// CHECK-NEXT:    [[BF_VALUE135:%.*]] = and i16 [[CONV133]], 1023
+// CHECK-NEXT:    [[BF_CLEAR136:%.*]] = and i16 [[BF_LOAD134]], -1024
+// CHECK-NEXT:    [[BF_SET137:%.*]] = or i16 [[BF_CLEAR136]], [[BF_VALUE135]]
+// CHECK-NEXT:    store i16 [[BF_SET137]], ptr [[D121]], align 1
+// CHECK-NEXT:    [[CONV138:%.*]] = trunc i32 [[TMP6]] to i16
+// CHECK-NEXT:    [[BF_LOAD139:%.*]] = load i16, ptr [[E122]], align 1
+// CHECK-NEXT:    [[BF_VALUE140:%.*]] = and i16 [[CONV138]], 63
+// CHECK-NEXT:    [[BF_SHL141:%.*]] = shl i16 [[BF_VALUE140]], 10
+// CHECK-NEXT:    [[BF_CLEAR142:%.*]] = and i16 [[BF_LOAD139]], 1023
+// CHECK-NEXT:    [[BF_SET143:%.*]] = or i16 [[BF_CLEAR142]], [[BF_SHL141]]
+// CHECK-NEXT:    store i16 [[BF_SET143]], ptr [[E122]], align 1
+// CHECK-NEXT:    [[CONV144:%.*]] = sitofp i32 [[TMP6]] to double
+// CHECK-NEXT:    store double [[CONV144]], ptr [[GEP125]], align 8
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[GEP126]], align 4
+// CHECK-NEXT:    [[E145:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP118]], i32 0, i32 3
+// CHECK-NEXT:    [[BF_LOAD146:%.*]] = load i16, ptr [[E145]], align 1
+// CHECK-NEXT:    [[BF_LSHR:%.*]] = lshr i16 [[BF_LOAD146]], 10
+// CHECK-NEXT:    [[BF_LOAD147:%.*]] = load i16, ptr [[E117]], align 1
+// CHECK-NEXT:    [[BF_VALUE148:%.*]] = and i16 [[BF_LSHR]], 63
+// CHECK-NEXT:    [[BF_SHL149:%.*]] = shl i16 [[BF_VALUE148]], 10
+// CHECK-NEXT:    [[BF_CLEAR150:%.*]] = and i16 [[BF_LOAD147]], 1023
+// CHECK-NEXT:    [[BF_SET151:%.*]] = or i16 [[BF_CLEAR150]], [[BF_SHL149]]
+// CHECK-NEXT:    store i16 [[BF_SET151]], ptr [[E117]], align 1
+// CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 5
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP153:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP152]], i32 0
+// CHECK-NEXT:    [[B154:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP153]], i32 0, i32 1
+// CHECK-NEXT:    [[D155:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP153]], i32 0, i32 3
+// CHECK-NEXT:    [[E156:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP153]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP157:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP152]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP158:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP152]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP159:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP152]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP160:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP152]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[GEP157]], align 4
+// CHECK-NEXT:    [[CONV161:%.*]] = sext i32 [[TMP7]] to i64
+// CHECK-NEXT:    [[BF_LOAD162:%.*]] = load i64, ptr [[B154]], align 1
+// CHECK-NEXT:    [[BF_VALUE163:%.*]] = and i64 [[CONV161]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR164:%.*]] = and i64 [[BF_LOAD162]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET165:%.*]] = or i64 [[BF_CLEAR164]], [[BF_VALUE163]]
+// CHECK-NEXT:    store i64 [[BF_SET165]], ptr [[B154]], align 1
+// CHECK-NEXT:    [[CONV166:%.*]] = sitofp i32 [[TMP7]] to float
+// CHECK-NEXT:    store float [[CONV166]], ptr [[GEP158]], align 4
+// CHECK-NEXT:    [[CONV167:%.*]] = trunc i32 [[TMP7]] to i16
+// CHECK-NEXT:    [[BF_LOAD168:%.*]] = load i16, ptr [[D155]], align 1
+// CHECK-NEXT:    [[BF_VALUE169:%.*]] = and i16 [[CONV167]], 1023
+// CHECK-NEXT:    [[BF_CLEAR170:%.*]] = and i16 [[BF_LOAD168]], -1024
+// CHECK-NEXT:    [[BF_SET171:%.*]] = or i16 [[BF_CLEAR170]], [[BF_VALUE169]]
+// CHECK-NEXT:    store i16 [[BF_SET171]], ptr [[D155]], align 1
+// CHECK-NEXT:    [[CONV172:%.*]] = trunc i32 [[TMP7]] to i16
+// CHECK-NEXT:    [[BF_LOAD173:%.*]] = load i16, ptr [[E156]], align 1
+// CHECK-NEXT:    [[BF_VALUE174:%.*]] = and i16 [[CONV172]], 63
+// CHECK-NEXT:    [[BF_SHL175:%.*]] = shl i16 [[BF_VALUE174]], 10
+// CHECK-NEXT:    [[BF_CLEAR176:%.*]] = and i16 [[BF_LOAD173]], 1023
+// CHECK-NEXT:    [[BF_SET177:%.*]] = or i16 [[BF_CLEAR176]], [[BF_SHL175]]
+// CHECK-NEXT:    store i16 [[BF_SET177]], ptr [[E156]], align 1
+// CHECK-NEXT:    [[CONV178:%.*]] = sitofp i32 [[TMP7]] to double
+// CHECK-NEXT:    store double [[CONV178]], ptr [[GEP159]], align 8
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[GEP160]], align 4
+// CHECK-NEXT:    [[F179:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP152]], i32 0, i32 5
+// CHECK-NEXT:    [[TMP8:%.*]] = load double, ptr [[F179]], align 1
+// CHECK-NEXT:    store double [[TMP8]], ptr [[F]], align 1
+// CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[MBF]], i32 0, i32 7
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP181:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP180]], i32 0
+// CHECK-NEXT:    [[B182:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP181]], i32 0, i32 1
+// CHECK-NEXT:    [[D183:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP181]], i32 0, i32 3
+// CHECK-NEXT:    [[E184:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[GEP181]], i32 0, i32 3
+// CHECK-NEXT:    [[GEP185:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP180]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP186:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP180]], i32 0, i32 2
+// CHECK-NEXT:    [[GEP187:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP180]], i32 0, i32 5
+// CHECK-NEXT:    [[GEP188:%.*]] = getelementptr inbounds [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP180]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP9]], ptr [[GEP185]], align 4
+// CHECK-NEXT:    [[CONV189:%.*]] = sext i32 [[TMP9]] to i64
+// CHECK-NEXT:    [[BF_LOAD190:%.*]] = load i64, ptr [[B182]], align 1
+// CHECK-NEXT:    [[BF_VALUE191:%.*]] = and i64 [[CONV189]], 1152921504606846975
+// CHECK-NEXT:    [[BF_CLEAR192:%.*]] = and i64 [[BF_LOAD190]], -1152921504606846976
+// CHECK-NEXT:    [[BF_SET193:%.*]] = or i64 [[BF_CLEAR192]], [[BF_VALUE191]]
+// CHECK-NEXT:    store i64 [[BF_SET193]], ptr [[B182]], align 1
+// CHECK-NEXT:    [[CONV194:%.*]] = sitofp i32 [[TMP9]] to float
+// CHECK-NEXT:    store float [[CONV194]], ptr [[GEP186]], align 4
+// CHECK-NEXT:    [[CONV195:%.*]] = trunc i32 [[TMP9]] to i16
+// CHECK-NEXT:    [[BF_LOAD196:%.*]] = load i16, ptr [[D183]], align 1
+// CHECK-NEXT:    [[BF_VALUE197:%.*]] = and i16 [[CONV195]], 1023
+// CHECK-NEXT:    [[BF_CLEAR198:%.*]] = and i16 [[BF_LOAD196]], -1024
+// CHECK-NEXT:    [[BF_SET199:%.*]] = or i16 [[BF_CLEAR198]], [[BF_VALUE197]]
+// CHECK-NEXT:    store i16 [[BF_SET199]], ptr [[D183]], align 1
+// CHECK-NEXT:    [[CONV200:%.*]] = trunc i32 [[TMP9]] to i16
+// CHECK-NEXT:    [[BF_LOAD201:%.*]] = load i16, ptr [[E184]], align 1
+// CHECK-NEXT:    [[BF_VALUE202:%.*]] = and i16 [[CONV200]], 63
+// CHECK-NEXT:    [[BF_SHL203:%.*]] = shl i16 [[BF_VALUE202]], 10
+// CHECK-NEXT:    [[BF_CLEAR204:%.*]] = and i16 [[BF_LOAD201]], 1023
+// CHECK-NEXT:    [[BF_SET205:%.*]] = or i16 [[BF_CLEAR204]], [[BF_SHL203]]
+// CHECK-NEXT:    store i16 [[BF_SET205]], ptr [[E184]], align 1
+// CHECK-NEXT:    [[CONV206:%.*]] = sitofp i32 [[TMP9]] to double
+// CHECK-NEXT:    store double [[CONV206]], ptr [[GEP187]], align 8
+// CHECK-NEXT:    store i32 [[TMP9]], ptr [[GEP188]], align 4
+// CHECK-NEXT:    [[G207:%.*]] = getelementptr inbounds nuw [[STRUCT_MOREBFIELDS]], ptr [[REF_TMP180]], i32 0, i32 7
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[G207]], align 1
+// CHECK-NEXT:    store i32 [[TMP10]], ptr [[G]], align 1
+// CHECK-NEXT:    ret void
+//
+void call13(int A) {
   MoreBFields MBF = (MoreBFields)A;
 }
 
@@ -322,24 +888,75 @@ struct Outer {
 };
 
 // show usage of "extra" gep for struct containing bitfield
-// CHECK-LABEL: call14
-// CHECK: [[AA:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[O:%.*]] = alloca %struct.Outer, align 1
-// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
-// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[AA]], align 4
-// CHECK-NEXT: [[FieldA:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 0
 // showing real usage of "extra gep". need Inner struct to generate access of its bitfield.
-// CHECK-NEXT: [[FieldI:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 1
-// CHECK-NEXT: [[FieldY:%.*]] = getelementptr inbounds nuw %struct.Inner, ptr [[FieldI]], i32 0, i32 1
-// CHECK-NEXT: [[FieldZ:%.*]] = getelementptr inbounds %struct.Outer, ptr [[O]], i32 0, i32 1, i32 0
-// CHECK-NEXT: store i32 [[Z]], ptr [[FieldA]], align 4
-// CHECK-NEXT: store i32 [[Z]], ptr [[FieldZ]], align 4
-// CHECK-NEXT: [[BFL:%.*]] = load i32, ptr [[FieldY]], align 1
-// CHECK-NEXT: [[BFV:%.*]] = and i32 [[Z]], 33554431
-// CHECK-NEXT: [[BFC:%.*]] = and i32 [[BFL]], -33554432
-// CHECK-NEXT: [[BFS:%.*]] = or i32 [[BFC]], [[BFV]]
-// CHECK-NEXT: store i32 [[BFS]], ptr [[FieldY]], align 1
-// CHECK-NEXT: ret void
-export void call14(int A) {
+// CHECK-LABEL: define hidden void @_Z6call14i(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[O:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 1
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_OUTER]], align 1
+// CHECK-NEXT:    [[REF_TMP5:%.*]] = alloca [[STRUCT_OUTER]], align 1
+// CHECK-NEXT:    [[REF_TMP17:%.*]] = alloca [[STRUCT_OUTER]], align 1
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[O]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[GEP2]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP]], i32 0, i32 1, i32 0
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i32, ptr [[Y]], align 1
+// CHECK-NEXT:    [[BF_VALUE:%.*]] = and i32 [[TMP0]], 33554431
+// CHECK-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -33554432
+// CHECK-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK-NEXT:    store i32 [[BF_SET]], ptr [[Y]], align 1
+// CHECK-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A4]], align 1
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[A1]], align 1
+// CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[O]], i32 0, i32 1
+// CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[I]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP6:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP5]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP7:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP5]], i32 0, i32 1
+// CHECK-NEXT:    [[Y8:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[GEP7]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP9:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP5]], i32 0, i32 1, i32 0
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[GEP6]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[GEP9]], align 4
+// CHECK-NEXT:    [[BF_LOAD10:%.*]] = load i32, ptr [[Y8]], align 1
+// CHECK-NEXT:    [[BF_VALUE11:%.*]] = and i32 [[TMP2]], 33554431
+// CHECK-NEXT:    [[BF_CLEAR12:%.*]] = and i32 [[BF_LOAD10]], -33554432
+// CHECK-NEXT:    [[BF_SET13:%.*]] = or i32 [[BF_CLEAR12]], [[BF_VALUE11]]
+// CHECK-NEXT:    store i32 [[BF_SET13]], ptr [[Y8]], align 1
+// CHECK-NEXT:    [[I14:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[REF_TMP5]], i32 0, i32 1
+// CHECK-NEXT:    [[Z15:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[I14]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[Z15]], align 1
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[Z]], align 1
+// CHECK-NEXT:    [[Y16:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[I]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[GEP18:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP17]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP19:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP17]], i32 0, i32 1
+// CHECK-NEXT:    [[Y20:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[GEP19]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP21:%.*]] = getelementptr inbounds [[STRUCT_OUTER]], ptr [[REF_TMP17]], i32 0, i32 1, i32 0
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GEP18]], align 4
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GEP21]], align 4
+// CHECK-NEXT:    [[BF_LOAD22:%.*]] = load i32, ptr [[Y20]], align 1
+// CHECK-NEXT:    [[BF_VALUE23:%.*]] = and i32 [[TMP4]], 33554431
+// CHECK-NEXT:    [[BF_CLEAR24:%.*]] = and i32 [[BF_LOAD22]], -33554432
+// CHECK-NEXT:    [[BF_SET25:%.*]] = or i32 [[BF_CLEAR24]], [[BF_VALUE23]]
+// CHECK-NEXT:    store i32 [[BF_SET25]], ptr [[Y20]], align 1
+// CHECK-NEXT:    [[I26:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[REF_TMP17]], i32 0, i32 1
+// CHECK-NEXT:    [[Y27:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[I26]], i32 0, i32 1
+// CHECK-NEXT:    [[BF_LOAD28:%.*]] = load i32, ptr [[Y27]], align 1
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD28]], 7
+// CHECK-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 7
+// CHECK-NEXT:    [[BF_LOAD29:%.*]] = load i32, ptr [[Y16]], align 1
+// CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i32 [[BF_ASHR]], 33554431
+// CHECK-NEXT:    [[BF_CLEAR31:%.*]] = and i32 [[BF_LOAD29]], -33554432
+// CHECK-NEXT:    [[BF_SET32:%.*]] = or i32 [[BF_CLEAR31]], [[BF_VALUE30]]
+// CHECK-NEXT:    store i32 [[BF_SET32]], ptr [[Y16]], align 1
+// CHECK-NEXT:    ret void
+//
+void call14(int A) {
   Outer O = (Outer)A;
 }
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 15dc2e0ed166b..77185491ca9b6 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -95,9 +95,9 @@ struct Derived : BFields {
 // vector flat cast from derived struct with bitfield
 // CHECK-LABEL: call6
 // CHECK: [[A:%.*]] = alloca <4 x i32>, align 4
-// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Derived, align 1
-// CHECK-NEXT: [[FlatTmp:%.*]] = alloca <4 x i32>, align 4
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
+// CHECK: [[Tmp:%.*]] = alloca %struct.Derived, align 1
+// CHECK: [[FlatTmp:%.*]] = alloca <4 x i32>, align 4
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 %D, i32 19, i1 false)
 // CHECK-NEXT: [[Gep:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw %struct.BFields, ptr [[Gep]], i32 0, i32 1
 // CHECK-NEXT: [[Gep1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[Tmp]], i32 0, i32 0, i32 0
@@ -218,9 +218,9 @@ struct BoolVecStruct {
 // vector flat cast from struct containing bool vector
 // CHECK-LABEL: call10
 // CHECK:    [[V:%.*]] = alloca <2 x i32>, align 4
-// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
-// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 %s, i32 8, i1 false)
+// CHECK:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
+// CHECK:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 %s, i32 8, i1 false)
 // CHECK-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr inbounds %struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 4
diff --git a/clang/test/CodeGenHLSL/BoolMatrix.hlsl b/clang/test/CodeGenHLSL/BoolMatrix.hlsl
index e9841ae6c9a90..8b6c1493d6e07 100644
--- a/clang/test/CodeGenHLSL/BoolMatrix.hlsl
+++ b/clang/test/CodeGenHLSL/BoolMatrix.hlsl
@@ -75,9 +75,7 @@ bool fn3() {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i1, align 4
 // CHECK-NEXT:    [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
-// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
-// CHECK-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[ARR]], i32 1
-// CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z3fn4v.Arr, i32 8, i1 false)
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
 // CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
@@ -129,9 +127,7 @@ void fn6() {
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
-// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
-// CHECK-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[ARR]], i32 1
-// CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARR]], ptr align 4 @__const._Z3fn7v.Arr, i32 8, i1 false)
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[ARRAYIDX]], i32 0, i32 1
 // CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
diff --git a/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl b/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
index 0eb36ce8fb7bb..e2ac5e15086cc 100644
--- a/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/hlsl_resource_t.hlsl
@@ -11,9 +11,9 @@ struct CustomResource {
 // CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", %struct.MyStruct, 0, 0)
 // CHECK: %struct.MyStruct = type { <4 x float>, <2 x i32> }
 
-// CHECK: define hidden void @_Z2fa14CustomResource(ptr noundef byval(%struct.CustomResource) align 1 %a)
-// CHECK: call void @_Z4foo114CustomResource(ptr noundef byval(%struct.CustomResource) align 1 %agg.tmp)
-// CHECK: declare hidden void @_Z4foo114CustomResource(ptr noundef byval(%struct.CustomResource) align 1)
+// CHECK: define hidden void @_Z2fa14CustomResource(ptr noundef dead_on_return %a)
+// CHECK: call void @_Z4foo114CustomResource(ptr noundef dead_on_return %agg.tmp)
+// CHECK: declare hidden void @_Z4foo114CustomResource(ptr noundef dead_on_return)
 
 void foo1(CustomResource res);
 
@@ -21,7 +21,7 @@ void fa(CustomResource a) {
     foo1(a);
 }
 
-// CHECK: define hidden void @_Z2fb14CustomResource(ptr noundef byval(%struct.CustomResource) align 1 %a)
+// CHECK: define hidden void @_Z2fb14CustomResource(ptr noundef dead_on_return %a)
 void fb(CustomResource a) {
     CustomResource b = a;
 }
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
index 2e6a7ef86c610..3128aa2193954 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
@@ -34,18 +34,17 @@ void main(unsigned GI : SV_GroupIndex) {
   Out2[GI] = In[GI];
 #endif
 
-  // For SPIR-V, the addrspacecast comes from `S::operator=` member function, which expects
-  // parameters in address space 0. This is why hlsl_device is a sub address
-  // space of the default address space.
-  // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
-  // SPV: %[[INCAST:.*]] = addrspacecast ptr addrspace(11) %[[INPTR]] to ptr
   // SPV: %[[OUTPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
-  // SPV: %[[OUTCAST:.*]] = addrspacecast ptr addrspace(11) %[[OUTPTR]] to ptr
-  // SPV: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[OUTCAST]], ptr align 1 %[[INCAST]], i64 4, i1 false)
+  // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
+  // SPV: call void @llvm.memcpy.p0.p11.i64(ptr align 1 %ref.tmp.i, ptr addrspace(11) align 1 %[[INPTR]], i64 4, i1 false)
+  // SPV: %[[L56:.*]] = load float, ptr %ref.tmp.i, align 1
+  // SPV: store float %[[L56]], ptr addrspace(11) %[[OUTPTR]]
 
   // For DXIL, hlsl_device and the default address space map to the same target address space. No need for an address space cast.
-  // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
   // DXIL: %[[OUTPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
-  // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %[[OUTPTR]], ptr align 1 %[[INPTR]], i32 4, i1 false)
+  // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
+  // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %ref.tmp.i, ptr align 1 %[[INPTR]], i32 4, i1 false)
+  // DXIL: %[[L70:.*]] = load float, ptr %ref.tmp.i, align 1
+  // DXIL: store float %[[L70]], ptr %[[OUTPTR]], align 1
   RWSB3[0] = RWSB3[1];
 }
diff --git a/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl b/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl
index 2f8dc97ef762e..e50044811017a 100644
--- a/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl
+++ b/clang/test/CodeGenHLSL/semantics/semantic-struct-2-output.hlsl
@@ -14,8 +14,8 @@ struct Output {
 
 // Make sure SV_DispatchThreadID translated into dx.thread.id.
 
-// CHECK-DX: define hidden void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef byval(%struct.Input) align 1 %input)
-// CHECK-VK: define hidden spir_func void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef byval(%struct.Input) align 1 %input)
+// CHECK-DX: define hidden void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef dead_on_return %input)
+// CHECK-VK: define hidden spir_func void @_Z3foo5Input(ptr dead_on_unwind noalias writable sret(%struct.Output) align 1 %agg.result, ptr noundef dead_on_return %input)
 
 // CHECK: %Idx = getelementptr inbounds nuw %struct.Input, ptr %input, i32 0, i32 0
 // CHECK: %[[#tmp:]] = load float, ptr %Idx, align 1
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index a2df307038774..b7babc99ecc7f 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -9,7 +9,7 @@ struct Pair {
       return this.First;
   }
   int getSecond() {
-    this = Pair();
+    this = {0,0};
     return Second;
   }
   void operator=(Pair P) {
@@ -32,14 +32,11 @@ void main() {
 // CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 1
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
-// CHECK-NEXT:store i32 5, ptr %First, align 1
-// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
-// CHECK-NEXT:store i32 10, ptr %Second, align 1
+// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %Another, ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
 // CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %agg.tmp, ptr align 1 %Another, i32 8, i1 false)
 // CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 1 %agg.tmp)
-// CHECK-NEXT:%First2 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
-// CHECK-NEXT:%0 = load i32, ptr %First2, align 1
+// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
+// CHECK-NEXT:%0 = load i32, ptr %First, align 1
 // CHECK-NEXT:ret i32 %0
 
 // CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
@@ -48,8 +45,12 @@ void main() {
 // CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 1
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 1 %agg.tmp, i8 0, i32 8, i1 false)
+// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %agg.tmp, i32 0, i32 0
+// CHECK-NEXT:store i32 0, ptr %First, align 1
+// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %agg.tmp, i32 0, i32 1
+// CHECK-NEXT:store i32 0, ptr %Second, align 1
+
 // CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 1 %agg.tmp)
-// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
-// CHECK-NEXT:%0 = load i32, ptr %Second, align 1
+// CHECK-NEXT:%Second2 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
+// CHECK-NEXT:%0 = load i32, ptr %Second2, align 1
 // CHECK-NEXT:ret i32 %0
diff --git a/clang/test/CodeGenHLSL/this-assignment.hlsl b/clang/test/CodeGenHLSL/this-assignment.hlsl
index 6efae95fdbb75..51041dd087944 100644
--- a/clang/test/CodeGenHLSL/this-assignment.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment.hlsl
@@ -6,14 +6,12 @@ struct Pair {
 
   int getFirst() {
     Pair Another = {5, 10};
-    Pair B = Another;
     this = Another;
     return this.First;
   }
 
-  // In HLSL 202x, this is a move assignment rather than a copy.
   int getSecond() {
-//    this = Pair();
+    this = {0, 0};
     return Second;
   }
 
@@ -41,27 +39,58 @@ void main() {
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
 // CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %Another, ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %this1, ptr align 1 %Another, i32 8, i1 false)
 // CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
+// CHECK-NEXT:%First2 = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
+// CHECK-NEXT:%0 = load i32, ptr %First2, align 1
+// CHECK-NEXT:store i32 %0, ptr %First, align 1
+// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
+// CHECK-NEXT:%Second3 = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
+// CHECK-NEXT:%1 = load i32, ptr %Second3, align 1
+// CHECK-NEXT:store i32 %1, ptr %Second, align 1
+// CHECK-NEXT:%First4 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
+// CHECK-NEXT:%2 = load i32, ptr %First4, align 1
+// CHECK-NEXT:ret i32 %2
 
 // CHECK-LABEL:     define {{.*}}getSecond
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
-// CHECK-NEXT:%ref.tmp = alloca %struct.Pair, align 1
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
-// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 1 %ref.tmp, i8 0, i32 8, i1 false)
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %this1, ptr align 1 %ref.tmp, i32 8, i1 false)
+// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
+// CHECK-NEXT:store i32 0, ptr %First, align 1
 // CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
+// CHECK-NEXT:store i32 0, ptr %Second, align 1
+// CHECK-NEXT:%Second2 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
+// CHECK-NEXT:%0 = load i32, ptr %Second2, align 1
+// CHECK-NEXT:ret i32 %0
 
 // CHECK-LABEL:     define {{.*}}DoSilly
-// CHECK-NEXT:entry:
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[ResultPtr:%.*]] = alloca ptr
 // CHECK-NEXT: [[ThisPtrAddr:%.*]] = alloca ptr
+// CHECK-NEXT: [[ObjIndirectAddr:%.*]] = alloca ptr
+// CHECK-NEXT: store ptr %agg.result, ptr [[ResultPtr]]
 // CHECK-NEXT: store ptr {{.*}}, ptr [[ThisPtrAddr]]
+// CHECK-NEXT: store ptr %Obj, ptr [[ObjIndirectAddr]]  
 // CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAddr]]
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ThisPtr]], ptr align 1 [[Obj:%.*]], i32 8, i1 false)
 // CHECK-NEXT: [[FirstAddr:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
-// CHECK-NEXT: [[First:%.*]] = load i32, ptr [[FirstAddr]]
-// CHECK-NEXT: [[FirstPlusTwo:%.*]] = add nsw i32 [[First]], 2
-// CHECK-NEXT: store i32 [[FirstPlusTwo]], ptr [[FirstAddr]]
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 {{.*}}, ptr align 1 [[Obj]], i32 8, i1 false)
+// CHECK-NEXT: [[FirstAddr2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Obj, i32 0, i32 0
+// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[FirstAddr2]]
+// CHECK-NEXT: store i32 [[Z]], ptr [[FirstAddr]], align 1
+// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
+// CHECK-NEXT: [[Second3:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Obj, i32 0, i32 1
+// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Second3]], align 1
+// CHECK-NEXT: store i32 [[Y]], ptr [[Second]], align 1
+// CHECK-NEXT: [[First4:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
+// CHECK-NEXT: [[X:%.*]] = load i32, ptr [[First4]], align 1
+// CHECK-NEXT: [[Add:%.*]] = add nsw i32 [[X]], 2
+// CHECK-NEXT: store i32 [[Add]], ptr [[First4]], align 1
+// CHECK-NEXT: [[First5:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %agg.result, i32 0, i32 0
+// CHECK-NEXT: [[First6:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Obj, i32 0, i32 0
+// CHECK-NEXT: [[W:%.*]] = load i32, ptr [[First6]], align 1
+// CHECK-NEXT: store i32 [[W]], ptr [[First5]], align 1
+// CHECK-NEXT: [[Second7:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %agg.result, i32 0, i32 1
+// CHECK-NEXT: [[Second8:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Obj, i32 0, i32 1
+// CHECK-NEXT: [[V:%.*]] = load i32, ptr [[Second8]], align 1
+// CHECK-NEXT: store i32 [[V]], ptr [[Second7]], align 1
+// CHECK-NEXT: ret void
diff --git a/clang/test/SemaHLSL/GlobalConstructors.hlsl b/clang/test/SemaHLSL/GlobalConstructors.hlsl
index ddd09422d7485..2dc06395099cf 100644
--- a/clang/test/SemaHLSL/GlobalConstructors.hlsl
+++ b/clang/test/SemaHLSL/GlobalConstructors.hlsl
@@ -3,6 +3,7 @@
 int i;
 
 struct Pup {
+  // expected-error at +1 {{HLSL doesn't support constructors or destructors}}
   Pup() {
     i++;
   }
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
index 15a3186931cb6..922c1359383dd 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
@@ -18,7 +18,7 @@ struct R {
 export void cantCast4() {
   int2 A = {1,2};
   R r = R(A);
-  // expected-error at -1 {{no viable conversion from 'int' to 'R}}
+  // expected-error at -1 {{no matching constructor for initialization of 'R'}}
   // expected-error at -2 {{no viable conversion from 'float' to 'R}}
   R r2;
   r2.A = 1;

>From 7394ea03b742f9718218d3da8aae4672cda5e66e Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Wed, 1 Apr 2026 16:38:30 -0700
Subject: [PATCH 3/9] format

---
 clang/include/clang/AST/DeclCXX.h   | 46 ++++++++++++++++-------------
 clang/lib/CodeGen/CGDecl.cpp        |  6 ++--
 clang/lib/CodeGen/CGHLSLRuntime.cpp |  6 ++--
 clang/lib/Sema/SemaCast.cpp         |  3 +-
 clang/lib/Sema/SemaDecl.cpp         |  9 +++---
 clang/lib/Sema/SemaExpr.cpp         | 23 ++++++++++-----
 clang/lib/Sema/SemaExprCXX.cpp      |  2 +-
 clang/lib/Sema/SemaInit.cpp         |  9 ++++--
 clang/lib/Sema/SemaOverload.cpp     | 21 +++++++------
 9 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 8f2704ae790ca..e8c34e0ca4f56 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -762,10 +762,10 @@ class CXXRecordDecl : public RecordDecl {
   /// Determines whether this class has any user provided special members.
   bool hasUserProvidedSpecialMembers() const {
     return data().UserDeclaredSpecialMembers &
-      (SMF_MoveConstructor | SMF_MoveAssignment | SMF_Destructor |
-       SMF_CopyAssignment | SMF_CopyConstructor) ||
-      data().UserDeclaredConstructor ||
-      data().UserProvidedDefaultConstructor;
+               (SMF_MoveConstructor | SMF_MoveAssignment | SMF_Destructor |
+                SMF_CopyAssignment | SMF_CopyConstructor) ||
+           data().UserDeclaredConstructor ||
+           data().UserProvidedDefaultConstructor;
   }
 
   /// Determine if we need to declare a default constructor for
@@ -774,14 +774,16 @@ class CXXRecordDecl : public RecordDecl {
   /// This value is used for lazy creation of default constructors.
   bool needsImplicitDefaultConstructor() const {
     return ((!data().UserDeclaredConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
-            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
-           // FIXME: Proposed fix to core wording issue: if a class inherits
-           // a default constructor and doesn't explicitly declare one, one
-           // is declared implicitly.
-           (data().HasInheritedDefaultConstructor &&
-            !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
-      (!getLangOpts().HLSL || (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) || hasUserProvidedSpecialMembers());
+             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
+             (!isLambda() || lambdaIsDefaultConstructibleAndAssignable())) ||
+            // FIXME: Proposed fix to core wording issue: if a class inherits
+            // a default constructor and doesn't explicitly declare one, one
+            // is declared implicitly.
+            (data().HasInheritedDefaultConstructor &&
+             !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
+           (!getLangOpts().HLSL ||
+            (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -808,7 +810,8 @@ class CXXRecordDecl : public RecordDecl {
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
     return !(data().DeclaredSpecialMembers & SMF_CopyConstructor) &&
-      (!getLangOpts().HLSL || isLambda() || hasUserProvidedSpecialMembers());
+           (!getLangOpts().HLSL || isLambda() ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -904,10 +907,9 @@ class CXXRecordDecl : public RecordDecl {
     return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveAssignment() &&
-           !hasUserDeclaredDestructor() &&
-      (!getLangOpts().HLSL || isLambda() || hasUserDeclaredConstructor()
-       || hasUserProvidedDefaultConstructor());
+           !hasUserDeclaredMoveAssignment() && !hasUserDeclaredDestructor() &&
+           (!getLangOpts().HLSL || isLambda() || hasUserDeclaredConstructor() ||
+            hasUserProvidedDefaultConstructor());
   }
 
   /// Determine whether we need to eagerly declare a defaulted move
@@ -937,7 +939,8 @@ class CXXRecordDecl : public RecordDecl {
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
     return !(data().DeclaredSpecialMembers & SMF_CopyAssignment) &&
-      (!getLangOpts().HLSL || isLambda() || hasUserProvidedSpecialMembers());
+           (!getLangOpts().HLSL || isLambda() ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -998,10 +1001,11 @@ class CXXRecordDecl : public RecordDecl {
     return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
            !hasUserDeclaredCopyConstructor() &&
            !hasUserDeclaredCopyAssignment() &&
-           !hasUserDeclaredMoveConstructor() &&
-           !hasUserDeclaredDestructor() &&
+           !hasUserDeclaredMoveConstructor() && !hasUserDeclaredDestructor() &&
            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
-      (!getLangOpts().HLSL || (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) || hasUserProvidedSpecialMembers());
+           (!getLangOpts().HLSL ||
+            (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
+            hasUserProvidedSpecialMembers());
   }
 
   /// Determine whether we need to eagerly declare a move assignment
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 49f35f254403e..4483f2dd94f7f 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1527,9 +1527,9 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
     if (D.getInit() && (Ty->isArrayType() || Ty->isRecordType()) &&
         (D.isConstexpr() ||
          ((Ty.isPODType(getContext()) ||
-	   getContext().getBaseElementType(Ty)->isObjCObjectPointerType() ||
-	   getLangOpts().HLSL) &&
-	    D.getInit()->isConstantInitializer(getContext(), false)))) {
+           getContext().getBaseElementType(Ty)->isObjCObjectPointerType() ||
+           getLangOpts().HLSL) &&
+          D.getInit()->isConstantInitializer(getContext(), false)))) {
 
       // If the variable's a const type, and it's neither an NRVO
       // candidate nor a __block variable and has no mutable members,
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 76effd217467d..cfdcd856c4f88 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -977,8 +977,10 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
             PD->getAttr<HLSLParamModifierAttr>()) {
       llvm_unreachable("Not handled yet");
     } else {
-      llvm::Type *ParamType =
-	Param.hasByValAttr() ? Param.getParamByValType() : PD->getType()->isRecordType() ? CGM.getTypes().ConvertType(PD->getType()) : Param.getType();
+      llvm::Type *ParamType = Param.hasByValAttr() ? Param.getParamByValType()
+                              : PD->getType()->isRecordType()
+                                  ? CGM.getTypes().ConvertType(PD->getType())
+                                  : Param.getType();
       auto AttrBegin = PD->specific_attr_begin<HLSLAppliedSemanticAttr>();
       auto AttrEnd = PD->specific_attr_end<HLSLAppliedSemanticAttr>();
       auto Result =
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 09c2e36512265..9d7c75ca1dbb5 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -1943,7 +1943,8 @@ TryCastResult TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr,
   bool CStyle = (CCK == CheckedConversionKind::CStyleCast ||
                  CCK == CheckedConversionKind::FunctionalCast);
   if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()) &&
-      InitSeq.getFailureKind() != InitializationSequence::FK_HLSLInitListFlatteningFailed)
+      InitSeq.getFailureKind() !=
+          InitializationSequence::FK_HLSLInitListFlatteningFailed)
     return TC_NotApplicable;
 
   ExprResult Result = InitSeq.Perform(Self, Entity, InitKind, SrcExprRaw);
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index bf603901a5150..b7bdd30a2d247 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14755,9 +14755,10 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
     // In HLSL don't default initialize user defined structs.
     // Must have failed because there was no valid defined default constructor
     if (InitSeq.Failed() && getLangOpts().HLSL &&
-	InitSeq.getFailureKind() == InitializationSequence::FK_ConstructorOverloadFailed)
-     return;
-    
+        InitSeq.getFailureKind() ==
+            InitializationSequence::FK_ConstructorOverloadFailed)
+      return;
+
     ExprResult Init = InitSeq.Perform(*this, Entity, Kind, {});
 
     if (Init.get()) {
@@ -14765,7 +14766,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
       // This is important for template substitution.
       Var->setInitStyle(VarDecl::CallInit);
     } else if (Init.isInvalid()) {
-      
+
       // If default-init fails, attach a recovery-expr initializer to track
       // that initialization was attempted and failed.
       auto RecoveryExpr =
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5124f64d3e245..6d5a293023f75 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10301,7 +10301,9 @@ AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType,
       return AssignConvertType::Compatible;
     }
 
-    if (ConvertRHS) // && (!getLangOpts().HLSL || Context.getCanonicalType(RHS.get()->getType()) != Context.getCanonicalType(LHSType)))
+    if (ConvertRHS) // && (!getLangOpts().HLSL ||
+                    // Context.getCanonicalType(RHS.get()->getType()) !=
+                    // Context.getCanonicalType(LHSType)))
       RHS = ImpCastExprToType(E, Ty, Kind);
   }
 
@@ -15447,9 +15449,10 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
       InitListExpr *ILE = cast<InitListExpr>(RHSExpr);
       if (!HLSL().transformInitList(Entity, ILE))
-	InitSeq.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
+        InitSeq.SetFailed(
+            InitializationSequence::FK_HLSLInitListFlatteningFailed);
       else
-	RHSExpr = ILE;
+        RHSExpr = ILE;
     }
 
     ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
@@ -15503,19 +15506,23 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     // If this is HLSL try to perform aggregate initialization.
     if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
       ResultTy = LHSExpr->getType();
-      InitListExpr *ILE = new (Context) InitListExpr(getASTContext(), RHSExpr->getBeginLoc(), {RHSExpr}, RHSExpr->getEndLoc());
+      InitListExpr *ILE =
+          new (Context) InitListExpr(getASTContext(), RHSExpr->getBeginLoc(),
+                                     {RHSExpr}, RHSExpr->getEndLoc());
       ILE->setType(getASTContext().VoidTy);
-      InitializationKind Kind = InitializationKind::CreateDirectList(RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
+      InitializationKind Kind = InitializationKind::CreateDirectList(
+          RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
       InitializedEntity Entity =
-	InitializedEntity::InitializeTemporary(ResultTy);
+          InitializedEntity::InitializeTemporary(ResultTy);
       RHSExpr = ILE;
       InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
       if (!HLSL().transformInitList(Entity, ILE))
-	InitSeq.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
+        InitSeq.SetFailed(
+            InitializationSequence::FK_HLSLInitListFlatteningFailed);
 
       ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
       if (Init.isInvalid())
-	return Init;
+        return Init;
       RHS = Init.get();
       break;
     }
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 1950bd4ab4a0c..0930389ac5efb 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1605,7 +1605,7 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
   // HLSL doesn't support constructors or c++ functional cast for structs
   if (getLangOpts().HLSL && Ty->isRecordType())
     return ExprError(Diag(TyBeginLoc, diag::err_ovl_no_viable_function_in_init)
-		     << Ty << FullRange);
+                     << Ty << FullRange);
 
   // C++ [expr.type.conv]p1:
   // If the expression list is a parenthesized single expression, the type
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index c0ebbad0c0232..fafd9273d33b2 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -4839,12 +4839,15 @@ static void TryConstructorOrParenListInitialization(
 
   // Try list initialization if this is hlsl
   if (S.getLangOpts().HLSL && Sequence.Failed()) {
-    InitListExpr *ILE = new (S.Context) InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args, Args.back()->getEndLoc());
+    InitListExpr *ILE = new (S.Context)
+        InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args,
+                     Args.back()->getEndLoc());
     ILE->setType(S.getASTContext().VoidTy);
     Args[0] = ILE;
     // reset sequence as normal
     Sequence.setSequenceKind(InitializationSequence::NormalSequence);
-    TryListInitialization(S, Entity, Kind, ILE, Sequence, /*TreatUnavailableAsInvalid=*/true);
+    TryListInitialization(S, Entity, Kind, ILE, Sequence,
+                          /*TreatUnavailableAsInvalid=*/true);
     return;
   }
   //       * Otherwise, if no constructor is viable, the destination type
@@ -5980,7 +5983,7 @@ static void TryOrBuildParenListInitialization(
         return false;
 
       if (InitExpr)
-	*InitExpr = ER.get();
+        *InitExpr = ER.get();
       else
         InitExprs.push_back(ER.get());
     }
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index d2c73d44bb1b1..44a01f0950aaf 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15810,7 +15810,7 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
                                                    Args, OpLoc);
       DeferDiagsRAII DDR(*this,
                          CandidateSet.shouldDeferDiags(*this, Args, OpLoc));
-      if (Args[0]->getType()->isRecordType() && !getLangOpts().HLSL && 
+      if (Args[0]->getType()->isRecordType() && !getLangOpts().HLSL &&
           Opc >= BO_Assign && Opc <= BO_OrAssign) {
         Diag(OpLoc,  diag::err_ovl_no_viable_oper)
              << BinaryOperator::getOpcodeStr(Opc)
@@ -15821,20 +15821,20 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
             << Args[0]->getSourceRange() << Args[1]->getSourceRange();
         }
       } else if (getLangOpts().HLSL) {
-	// If this is HLSL fall back to builtin operation
+        // If this is HLSL fall back to builtin operation
 
-	// This is an erroneous use of an operator which can be overloaded by
+        // This is an erroneous use of an operator which can be overloaded by
         // a non-member function. Check for non-member operators which were
         // defined too late to be candidates.
         if (DiagnoseTwoPhaseOperatorLookup(*this, Op, OpLoc, Args))
           // FIXME: Recover by calling the found function.
           return ExprError();
 
-	Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
+        Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
+
+        if (!Result.isInvalid())
+          return Result;
 
-	if (!Result.isInvalid())
-	  return Result;
-	
       } else {
         // This is an erroneous use of an operator which can be overloaded by
         // a non-member function. Check for non-member operators which were
@@ -15847,10 +15847,9 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         // produce an error. Then, show the non-viable candidates.
         Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
 
-	// If this was HLSL it might not have produced an error.
-	if (getLangOpts().HLSL && !Result.isInvalid())
-	  return Result;
-	  
+        // If this was HLSL it might not have produced an error.
+        if (getLangOpts().HLSL && !Result.isInvalid())
+          return Result;
       }
       assert(Result.isInvalid() &&
              "C++ binary operator overloading is missing candidates!");

>From 3009f093daab2f7318e536d7c3b7e0d916a6ec28 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Thu, 2 Apr 2026 10:28:44 -0700
Subject: [PATCH 4/9] self review; mostly adding comments + removing some
 unnecessary changes

---
 clang/include/clang/AST/DeclCXX.h   |  2 ++
 clang/lib/CodeGen/CGDecl.cpp        |  2 ++
 clang/lib/CodeGen/CGHLSLRuntime.cpp |  2 ++
 clang/lib/Sema/SemaCast.cpp         |  4 +++-
 clang/lib/Sema/SemaDecl.cpp         |  1 -
 clang/lib/Sema/SemaDeclCXX.cpp      |  2 +-
 clang/lib/Sema/SemaExpr.cpp         |  7 +++----
 clang/lib/Sema/SemaLookup.cpp       |  2 --
 clang/lib/Sema/SemaOverload.cpp     | 17 +----------------
 9 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index e8c34e0ca4f56..65d437d201572 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -759,6 +759,8 @@ class CXXRecordDecl : public RecordDecl {
            needsImplicitDefaultConstructor();
   }
 
+  // Used by HLSL to determine if implicit constructors and operators should
+  // be allowed for structs. This is required for HLSL's resource classes.
   /// Determines whether this class has any user provided special members.
   bool hasUserProvidedSpecialMembers() const {
     return data().UserDeclaredSpecialMembers &
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 4483f2dd94f7f..353c2a891cdef 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1528,6 +1528,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
         (D.isConstexpr() ||
          ((Ty.isPODType(getContext()) ||
            getContext().getBaseElementType(Ty)->isObjCObjectPointerType() ||
+           // check if it is a constant initializer if HLSL because PODness
+           // will no longer be true for any user defined structs
            getLangOpts().HLSL) &&
           D.getInit()->isConstantInitializer(getContext(), false)))) {
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index cfdcd856c4f88..4520f5f587026 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -988,6 +988,8 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
       SemanticValue = Result.first;
       if (!SemanticValue)
         return;
+      // if this is a ptr to a record and it does'nt have byval attr
+      // we still need the record type not just 'ptr'
       if (Param.hasByValAttr() || PD->getType()->isRecordType()) {
         llvm::Value *Var = B.CreateAlloca(ParamType);
         B.CreateStore(SemanticValue, Var);
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 9d7c75ca1dbb5..0f39f6a21e169 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -1939,7 +1939,9 @@ TryCastResult TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr,
   // There is no other way that works.
   // On the other hand, if we're checking a C-style cast, we've still got
   // the reinterpret_cast way.
-  // If an HLSLInitListFlattening failed then there is no fallback.
+  // If an HLSLInitListFlattening failed then there is no fallback; this
+  // check helps prevent double errors being produced and transformInitList
+  // being run a 2nd time during diagnoses.
   bool CStyle = (CCK == CheckedConversionKind::CStyleCast ||
                  CCK == CheckedConversionKind::FunctionalCast);
   if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()) &&
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index b7bdd30a2d247..bc205ea525134 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14766,7 +14766,6 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
       // This is important for template substitution.
       Var->setInitStyle(VarDecl::CallInit);
     } else if (Init.isInvalid()) {
-
       // If default-init fails, attach a recovery-expr initializer to track
       // that initialization was attempted and failed.
       auto RecoveryExpr =
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index d3f2eb578a227..cc93b7cf054d3 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -3507,7 +3507,7 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D,
     }
   }
 
-  // HLSL prohibits constructors and destructors.
+  // HLSL prohibits user defined constructors and destructors.
   if (getLangOpts().HLSL) {
     switch (Name.getNameKind()) {
     case DeclarationName::CXXConstructorName:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 6d5a293023f75..ca5310bd5e5b8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10301,9 +10301,7 @@ AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType,
       return AssignConvertType::Compatible;
     }
 
-    if (ConvertRHS) // && (!getLangOpts().HLSL ||
-                    // Context.getCanonicalType(RHS.get()->getType()) !=
-                    // Context.getCanonicalType(LHSType)))
+    if (ConvertRHS)
       RHS = ImpCastExprToType(E, Ty, Kind);
   }
 
@@ -15503,7 +15501,8 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
 
   switch (Opc) {
   case BO_Assign:
-    // If this is HLSL try to perform aggregate initialization.
+    // If this is HLSL and the LHS is a record try to perform aggregate
+    // initialization.
     if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
       ResultTy = LHSExpr->getType();
       InitListExpr *ILE =
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 68a780382c0b0..de53f6010a1b6 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -3632,8 +3632,6 @@ CXXConstructorDecl *Sema::LookupMovingConstructor(CXXRecordDecl *Class,
 
 DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) {
   // If the implicit constructors have not yet been declared, do so now.
-  // HLSL doesn't support implicit constructors because constructors for
-  // user defined classes are not supported
   if (CanDeclareSpecialMemberFunction(Class)) {
     runWithSufficientStackSpace(Class->getLocation(), [&] {
       if (Class->needsImplicitDefaultConstructor())
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 44a01f0950aaf..52e6fa5605cc5 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -15820,21 +15820,6 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
             << Args[0]->getType()
             << Args[0]->getSourceRange() << Args[1]->getSourceRange();
         }
-      } else if (getLangOpts().HLSL) {
-        // If this is HLSL fall back to builtin operation
-
-        // This is an erroneous use of an operator which can be overloaded by
-        // a non-member function. Check for non-member operators which were
-        // defined too late to be candidates.
-        if (DiagnoseTwoPhaseOperatorLookup(*this, Op, OpLoc, Args))
-          // FIXME: Recover by calling the found function.
-          return ExprError();
-
-        Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
-
-        if (!Result.isInvalid())
-          return Result;
-
       } else {
         // This is an erroneous use of an operator which can be overloaded by
         // a non-member function. Check for non-member operators which were
@@ -15847,7 +15832,7 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         // produce an error. Then, show the non-viable candidates.
         Result = CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]);
 
-        // If this was HLSL it might not have produced an error.
+        // If this was HLSL it might not have produced an error which is ok.
         if (getLangOpts().HLSL && !Result.isInvalid())
           return Result;
       }

>From a239f0a3ebb2aa0e6909024c35aa5655369c06f5 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Mon, 6 Apr 2026 21:41:45 -0700
Subject: [PATCH 5/9] update error messages

---
 clang/include/clang/Basic/DiagnosticSemaKinds.td       |  1 +
 clang/lib/Sema/SemaCast.cpp                            | 10 ++++++----
 clang/lib/Sema/SemaExprCXX.cpp                         |  4 ++--
 clang/lib/Sema/SemaInit.cpp                            |  2 ++
 .../SemaHLSL/Language/AggregateSplatCast-errors.hlsl   |  6 +++---
 .../test/SemaHLSL/Language/ElementwiseCast-errors.hlsl |  5 ++---
 clang/test/SemaHLSL/prohibit_pointer.hlsl              |  2 +-
 7 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4e2f4a8dafb70..dfbe4220e8797 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13534,6 +13534,7 @@ def err_hlsl_builtin_resource_coordinate_dimension_mismatch : Error<
 
 // HLSL Diagnostics
 def err_hlsl_cstor_dstor : Error<"HLSL doesn't support constructors or destructors">;
+def err_hlsl_constructors_functional_cast : Error<"HLSL doesn't support constructors or functional-style casts">;
 def err_hlsl_langstd_unimplemented : Error<"support for HLSL language version %0 is incomplete">;
 def err_hlsl_attr_unsupported_in_stage : Error<"attribute %0 is unsupported in '%1' shaders, requires %select{|one of the following: }2%3">;
 def err_hlsl_attr_invalid_type : Error<
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 0f39f6a21e169..4e814eccf9e43 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -520,6 +520,8 @@ static bool tryDiagnoseOverloadedCast(Sema &S, CastType CT,
     break;
   case InitializationSequence::FK_ConstructorOverloadFailed:
   case InitializationSequence::FK_UserConversionOverloadFailed:
+    // HLSL list initialization must have failed as a constructor replacement
+  case InitializationSequence::FK_HLSLInitListFlatteningFailed:
     break;
   }
 
@@ -531,7 +533,9 @@ static bool tryDiagnoseOverloadedCast(Sema &S, CastType CT,
   switch (sequence.getFailedOverloadResult()) {
   case OR_Success: llvm_unreachable("successful failed overload");
   case OR_No_Viable_Function:
-    if (candidates.empty())
+    // hlsl doesn't currently support conversion operators, so
+    // produce the other diagnostic.
+    if (candidates.empty() && !S.getLangOpts().HLSL)
       msg = diag::err_ovl_no_conversion_in_cast;
     else
       msg = diag::err_ovl_no_viable_conversion_in_cast;
@@ -1944,9 +1948,7 @@ TryCastResult TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr,
   // being run a 2nd time during diagnoses.
   bool CStyle = (CCK == CheckedConversionKind::CStyleCast ||
                  CCK == CheckedConversionKind::FunctionalCast);
-  if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()) &&
-      InitSeq.getFailureKind() !=
-          InitializationSequence::FK_HLSLInitListFlatteningFailed)
+  if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()))
     return TC_NotApplicable;
 
   ExprResult Result = InitSeq.Perform(Self, Entity, InitKind, SrcExprRaw);
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 0930389ac5efb..1c325f1f942d2 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1604,8 +1604,8 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
 
   // HLSL doesn't support constructors or c++ functional cast for structs
   if (getLangOpts().HLSL && Ty->isRecordType())
-    return ExprError(Diag(TyBeginLoc, diag::err_ovl_no_viable_function_in_init)
-                     << Ty << FullRange);
+    return ExprError(
+        Diag(TyBeginLoc, diag::err_hlsl_constructors_functional_cast));
 
   // C++ [expr.type.conv]p1:
   // If the expression list is a parenthesized single expression, the type
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index fafd9273d33b2..fbb4e4117236e 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -4846,6 +4846,8 @@ static void TryConstructorOrParenListInitialization(
     Args[0] = ILE;
     // reset sequence as normal
     Sequence.setSequenceKind(InitializationSequence::NormalSequence);
+    // don't want the diagnostics to appear if list initialization fails.
+    Sema::TentativeAnalysisScope DisableDiag(S);
     TryListInitialization(S, Entity, Kind, ILE, Sequence,
                           /*TreatUnavailableAsInvalid=*/true);
     return;
diff --git a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
index 8b5b763f38788..fbb47bd2e7d39 100644
--- a/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/AggregateSplatCast-errors.hlsl
@@ -16,7 +16,7 @@ struct R {
 // Can't cast a union
 export void cantCast2() {
   R r = (R)1;
-  // expected-error at -1 {{too few initializers in list for type 'R' (expected 2 but found 1)}}
+  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'R'}}
 }
 
 RWBuffer<float4> Buf;
@@ -24,7 +24,7 @@ RWBuffer<float4> Buf;
 // Can't cast an intangible type
 export void cantCast3() {
   Buf = (RWBuffer<float4>)1;
-  // expected-error at -1 {{no viable conversion from 'int' to 'hlsl::RWBuffer<vector<float, 4>>'}}
+  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'RWBuffer<float4>' (aka 'RWBuffer<vector<float, 4>>')}}
 }
 
 export void cantCast4() {
@@ -39,5 +39,5 @@ struct X {
 
 export void cantCast5() {
   X x = (X)1;
-  // expected-error at -1 {{too few initializers in list for type 'X' (expected 2 but found 1)}}
+  // expected-error at -1 {{no matching conversion for C-style cast from 'int' to 'X'}}
 }
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
index 922c1359383dd..94275b7fc16ef 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl
@@ -18,11 +18,10 @@ struct R {
 export void cantCast4() {
   int2 A = {1,2};
   R r = R(A);
-  // expected-error at -1 {{no matching constructor for initialization of 'R'}}
-  // expected-error at -2 {{no viable conversion from 'float' to 'R}}
+  // expected-error at -1 {{HLSL doesn't support constructors or functional-style casts}}
   R r2;
   r2.A = 1;
   r2.F = 2.0;
   int2 B = (int2)r2;
-  // expected-error at -1 {{cannot convert 'R' to 'int2' (aka 'vector<int, 2>') without a conversion operator}}
+  // expected-error at -1 {{no matching conversion for C-style cast from 'R' to 'int2' (aka 'vector<int, 2>')}}
 }
diff --git a/clang/test/SemaHLSL/prohibit_pointer.hlsl b/clang/test/SemaHLSL/prohibit_pointer.hlsl
index 90f1160e1d593..cfc6b21834a6d 100644
--- a/clang/test/SemaHLSL/prohibit_pointer.hlsl
+++ b/clang/test/SemaHLSL/prohibit_pointer.hlsl
@@ -68,7 +68,7 @@ struct Fish {
 
   // expected-note at +1 {{'->' applied to return value of the operator->() declared here}}
   Fins operator ->() {
-    // expected-error at +1 {{no matching constructor for initialization of 'Fins'}}
+    // expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
     return Fins();
   }
 };

>From a76e83385299ddfdf88bdaa21d562a02445d691f Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Fri, 10 Apr 2026 15:57:12 -0700
Subject: [PATCH 6/9] respond to pr comments

---
 clang/include/clang/AST/DeclCXX.h             | 21 ++++---------------
 clang/include/clang/Sema/SemaHLSL.h           |  3 +++
 clang/lib/CodeGen/CGDecl.cpp                  |  4 ++--
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  4 ++--
 clang/lib/Sema/SemaCast.cpp                   |  7 ++-----
 clang/lib/Sema/SemaExpr.cpp                   | 19 +++--------------
 clang/lib/Sema/SemaExprCXX.cpp                |  2 +-
 clang/lib/Sema/SemaHLSL.cpp                   | 17 +++++++++++++++
 clang/lib/Sema/SemaInit.cpp                   |  6 +++---
 .../CodeGenHLSL/this-assignment-overload.hlsl | 19 +++++++++++------
 10 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 65d437d201572..5797edcb72943 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -759,17 +759,6 @@ class CXXRecordDecl : public RecordDecl {
            needsImplicitDefaultConstructor();
   }
 
-  // Used by HLSL to determine if implicit constructors and operators should
-  // be allowed for structs. This is required for HLSL's resource classes.
-  /// Determines whether this class has any user provided special members.
-  bool hasUserProvidedSpecialMembers() const {
-    return data().UserDeclaredSpecialMembers &
-               (SMF_MoveConstructor | SMF_MoveAssignment | SMF_Destructor |
-                SMF_CopyAssignment | SMF_CopyConstructor) ||
-           data().UserDeclaredConstructor ||
-           data().UserProvidedDefaultConstructor;
-  }
-
   /// Determine if we need to declare a default constructor for
   /// this class.
   ///
@@ -785,7 +774,7 @@ class CXXRecordDecl : public RecordDecl {
              !(data().DeclaredSpecialMembers & SMF_DefaultConstructor))) &&
            (!getLangOpts().HLSL ||
             (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
-            hasUserProvidedSpecialMembers());
+            isImplicit());
   }
 
   /// Determine whether this class has any user-declared constructors.
@@ -812,8 +801,7 @@ class CXXRecordDecl : public RecordDecl {
   /// constructor to be lazily declared.
   bool needsImplicitCopyConstructor() const {
     return !(data().DeclaredSpecialMembers & SMF_CopyConstructor) &&
-           (!getLangOpts().HLSL || isLambda() ||
-            hasUserProvidedSpecialMembers());
+           (!getLangOpts().HLSL || isLambda() || isImplicit());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -941,8 +929,7 @@ class CXXRecordDecl : public RecordDecl {
   /// assignment operator to be lazily declared.
   bool needsImplicitCopyAssignment() const {
     return !(data().DeclaredSpecialMembers & SMF_CopyAssignment) &&
-           (!getLangOpts().HLSL || isLambda() ||
-            hasUserProvidedSpecialMembers());
+           (!getLangOpts().HLSL || isLambda() || isImplicit());
   }
 
   /// Determine whether we need to eagerly declare a defaulted copy
@@ -1007,7 +994,7 @@ class CXXRecordDecl : public RecordDecl {
            (!isLambda() || lambdaIsDefaultConstructibleAndAssignable()) &&
            (!getLangOpts().HLSL ||
             (isLambda() && lambdaIsDefaultConstructibleAndAssignable()) ||
-            hasUserProvidedSpecialMembers());
+            isImplicit());
   }
 
   /// Determine whether we need to eagerly declare a move assignment
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index 6f62fbc635d9c..c62f85196276d 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -233,6 +233,9 @@ class SemaHLSL : public SemaBase {
   bool initGlobalResourceDecl(VarDecl *VD);
   bool initGlobalResourceArrayDecl(VarDecl *VD);
 
+  ExprResult tryAggregateInitialization(Sema &S, QualType DestTy,
+                                        Expr *RHSExpr);
+
 private:
   // HLSL resource type attributes need to be processed all at once.
   // This is a list to collect them.
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 353c2a891cdef..0edb6a899647b 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1528,8 +1528,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
         (D.isConstexpr() ||
          ((Ty.isPODType(getContext()) ||
            getContext().getBaseElementType(Ty)->isObjCObjectPointerType() ||
-           // check if it is a constant initializer if HLSL because PODness
-           // will no longer be true for any user defined structs
+           // If HLSL, then check if it is a constant initializer because
+           // PODness will no longer be true for any user defined structs.
            getLangOpts().HLSL) &&
           D.getInit()->isConstantInitializer(getContext(), false)))) {
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 4520f5f587026..bc9ea617519e5 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -988,8 +988,8 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
       SemanticValue = Result.first;
       if (!SemanticValue)
         return;
-      // if this is a ptr to a record and it does'nt have byval attr
-      // we still need the record type not just 'ptr'
+      // If this is a 'ptr' to a record and it doesn't have byval attribute,
+      // we still need the record type, not just 'ptr'.
       if (Param.hasByValAttr() || PD->getType()->isRecordType()) {
         llvm::Value *Var = B.CreateAlloca(ParamType);
         B.CreateStore(SemanticValue, Var);
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 4e814eccf9e43..ecdb76f45fa67 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -520,7 +520,7 @@ static bool tryDiagnoseOverloadedCast(Sema &S, CastType CT,
     break;
   case InitializationSequence::FK_ConstructorOverloadFailed:
   case InitializationSequence::FK_UserConversionOverloadFailed:
-    // HLSL list initialization must have failed as a constructor replacement
+    // HLSL list initialization must have failed as a constructor replacement.
   case InitializationSequence::FK_HLSLInitListFlatteningFailed:
     break;
   }
@@ -533,7 +533,7 @@ static bool tryDiagnoseOverloadedCast(Sema &S, CastType CT,
   switch (sequence.getFailedOverloadResult()) {
   case OR_Success: llvm_unreachable("successful failed overload");
   case OR_No_Viable_Function:
-    // hlsl doesn't currently support conversion operators, so
+    // HLSL doesn't currently support conversion operators, so
     // produce the other diagnostic.
     if (candidates.empty() && !S.getLangOpts().HLSL)
       msg = diag::err_ovl_no_conversion_in_cast;
@@ -1943,9 +1943,6 @@ TryCastResult TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr,
   // There is no other way that works.
   // On the other hand, if we're checking a C-style cast, we've still got
   // the reinterpret_cast way.
-  // If an HLSLInitListFlattening failed then there is no fallback; this
-  // check helps prevent double errors being produced and transformInitList
-  // being run a 2nd time during diagnoses.
   bool CStyle = (CCK == CheckedConversionKind::CStyleCast ||
                  CCK == CheckedConversionKind::FunctionalCast);
   if (InitSeq.Failed() && (CStyle || !DestType->isReferenceType()))
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ca5310bd5e5b8..4df64c0f887e1 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -15443,7 +15443,7 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
         InitializedEntity::InitializeTemporary(LHSExpr->getType());
     InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
 
-    // If this is HLSL and LHS is a record we transform the init list
+    // If this is HLSL and LHS is a record we transform the init list.
     if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
       InitListExpr *ILE = cast<InitListExpr>(RHSExpr);
       if (!HLSL().transformInitList(Entity, ILE))
@@ -15505,21 +15505,8 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     // initialization.
     if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
       ResultTy = LHSExpr->getType();
-      InitListExpr *ILE =
-          new (Context) InitListExpr(getASTContext(), RHSExpr->getBeginLoc(),
-                                     {RHSExpr}, RHSExpr->getEndLoc());
-      ILE->setType(getASTContext().VoidTy);
-      InitializationKind Kind = InitializationKind::CreateDirectList(
-          RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
-      InitializedEntity Entity =
-          InitializedEntity::InitializeTemporary(ResultTy);
-      RHSExpr = ILE;
-      InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
-      if (!HLSL().transformInitList(Entity, ILE))
-        InitSeq.SetFailed(
-            InitializationSequence::FK_HLSLInitListFlatteningFailed);
-
-      ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
+      ExprResult Init =
+          HLSL().tryAggregateInitialization(*this, ResultTy, RHSExpr);
       if (Init.isInvalid())
         return Init;
       RHS = Init.get();
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 1c325f1f942d2..587ab845178d4 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1602,7 +1602,7 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
         Context, Ty.getNonReferenceType(), TInfo, LParenOrBraceLoc, Exprs,
         RParenOrBraceLoc, ListInitialization);
 
-  // HLSL doesn't support constructors or c++ functional cast for structs
+  // HLSL doesn't support constructors or c++ functional casts for structs.
   if (getLangOpts().HLSL && Ty->isRecordType())
     return ExprError(
         Diag(TyBeginLoc, diag::err_hlsl_constructors_functional_cast));
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 892ec9ee6381a..370c636effdde 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -6202,3 +6202,20 @@ QualType SemaHLSL::ActOnTemplateShorthand(TemplateDecl *Template,
       ElaboratedTypeKeyword::None, TemplateName(Template), NameLoc,
       TemplateArgs, nullptr, /*ForNestedNameSpecifier=*/false);
 }
+
+ExprResult SemaHLSL::tryAggregateInitialization(Sema &S, QualType DestTy,
+                                                Expr *RHSExpr) {
+  InitListExpr *ILE =
+      new (S.Context) InitListExpr(S.getASTContext(), RHSExpr->getBeginLoc(),
+                                   {RHSExpr}, RHSExpr->getEndLoc());
+  ILE->setType(S.getASTContext().VoidTy);
+  InitializationKind Kind = InitializationKind::CreateDirectList(
+      RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
+  InitializedEntity Entity = InitializedEntity::InitializeTemporary(DestTy);
+  RHSExpr = ILE;
+  InitializationSequence InitSeq(S, Entity, Kind, RHSExpr);
+  if (!transformInitList(Entity, ILE))
+    InitSeq.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
+
+  return InitSeq.Perform(S, Entity, Kind, RHSExpr);
+}
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index fbb4e4117236e..9c97dd3d65ebd 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -4837,16 +4837,16 @@ static void TryConstructorOrParenListInitialization(
   TryConstructorInitialization(S, Entity, Kind, Args, DestType, DestType,
                                Sequence, /*IsListInit=*/false, IsAggrListInit);
 
-  // Try list initialization if this is hlsl
+  // Try list initialization if this is HLSL.
   if (S.getLangOpts().HLSL && Sequence.Failed()) {
     InitListExpr *ILE = new (S.Context)
         InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args,
                      Args.back()->getEndLoc());
     ILE->setType(S.getASTContext().VoidTy);
     Args[0] = ILE;
-    // reset sequence as normal
+    // Reset the sequence as normal.
     Sequence.setSequenceKind(InitializationSequence::NormalSequence);
-    // don't want the diagnostics to appear if list initialization fails.
+    // We don't want the diagnostics to appear if list initialization fails.
     Sema::TentativeAnalysisScope DisableDiag(S);
     TryListInitialization(S, Entity, Kind, ILE, Sequence,
                           /*TreatUnavailableAsInvalid=*/true);
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index b7babc99ecc7f..7c1c372bccef6 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -33,11 +33,18 @@ void main() {
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
 // CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %Another, ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
-// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %agg.tmp, ptr align 1 %Another, i32 8, i1 false)
-// CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 1 %agg.tmp)
-// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
-// CHECK-NEXT:%0 = load i32, ptr %First, align 1
-// CHECK-NEXT:ret i32 %0
+// CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %agg.tmp, i32 0, i32 0
+// CHECK-NEXT:%First2 = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
+// CHECK-NEXT:%0 = load i32, ptr %First2, align 1
+// CHECK-NEXT:store i32 %0, ptr %First, align 1
+// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %agg.tmp, i32 0, i32 1
+// CHECK-NEXT:%Second3 = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
+// CHECK-NEXT:%1 = load i32, ptr %Second3, align 1
+// CHECK-NEXT:store i32 %1, ptr %Second, align 1
+// CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef dead_on_return %agg.tmp)
+// CHECK-NEXT:%First4 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
+// CHECK-NEXT:%2 = load i32, ptr %First4, align 1
+// CHECK-NEXT:ret i32 %2
 
 // CHECK:     define linkonce_odr hidden noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 1 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
@@ -50,7 +57,7 @@ void main() {
 // CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %agg.tmp, i32 0, i32 1
 // CHECK-NEXT:store i32 0, ptr %Second, align 1
 
-// CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 1 %agg.tmp)
+// CHECK-NEXT:call void @_ZN4PairaSES_(ptr noundef nonnull align 1 dereferenceable(8) %this1, ptr noundef dead_on_return %agg.tmp)
 // CHECK-NEXT:%Second2 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
 // CHECK-NEXT:%0 = load i32, ptr %Second2, align 1
 // CHECK-NEXT:ret i32 %0

>From 5edc60faedc7ddfef15ae52fb46c4382259bb51b Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Fri, 10 Apr 2026 16:29:58 -0700
Subject: [PATCH 7/9] update global array test

---
 .../CodeGenHLSL/convergence/global_array.hlsl | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/convergence/global_array.hlsl

diff --git a/clang/test/CodeGenHLSL/convergence/global_array.hlsl b/clang/test/CodeGenHLSL/convergence/global_array.hlsl
new file mode 100644
index 0000000000000..a9f5dde3fcb8c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/convergence/global_array.hlsl
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// CHECK: define internal spir_func void @__cxx_global_var_init()
+// CHECK: [[entry_token:%.*]] = call token @llvm.experimental.convergence.entry()
+// CHECK: br label %[[loop_entry:.*]]
+
+// CHECK: [[loop_entry]]:
+// CHECK: [[loop_token:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[entry_token]]) ]
+// CHECK: call void {{.*}} [ "convergencectrl"(token [[loop_token]]) ]
+// CHECK: br i1 {{%.*}} label {{%.*}} label %[[loop_entry]]
+
+RWBuffer<float> b[2];
+static RWBuffer<float> s[2];
+
+[numthreads(1,1,1)]
+void main() {
+  s = b;
+  s[0][0] = 1.0;
+}

>From e2013fb0d5e29b19f6a3c609697ff9a18945a70c Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Tue, 14 Apr 2026 17:04:59 -0700
Subject: [PATCH 8/9] test i forgot to add

---
 clang/test/SemaHLSL/Constructors.hlsl | 79 +++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 clang/test/SemaHLSL/Constructors.hlsl

diff --git a/clang/test/SemaHLSL/Constructors.hlsl b/clang/test/SemaHLSL/Constructors.hlsl
new file mode 100644
index 0000000000000..5b6c27e79e685
--- /dev/null
+++ b/clang/test/SemaHLSL/Constructors.hlsl
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fsyntax-only %s -verify
+
+struct Pair {
+  int First;
+  int Second;
+
+  //expected-error at +1 {{HLSL doesn't support constructors or destructors}}
+  Pair() {
+    First = 0;
+    Second = 0;
+  }
+
+  //expected-error at +1 {{HLSL doesn't support constructors or destructors}}
+  Pair(int F, int S) {
+    First = F;
+    Second = S;
+  }
+
+  //expected-error at +2 {{references are unsupported in HLSL}}
+  //expected-error at +1 {{HLSL doesn't support constructors or destructors}}
+  Pair(const Pair& P) {
+    this.First = P.First;
+    this.Second = P.Second;
+  }
+
+  //expected-error at +2 {{references are unsupported in HLSL}}
+  //expected-error at +1 {{HLSL doesn't support constructors or destructors}}
+  Pair(Pair&& P) = default;
+
+  //expected-error at +1 {{HLSL doesn't support constructors or destructors}}
+  ~Pair();
+
+  //expected-error at +2 {{references are unsupported in HLSL}}
+  //expected-error at +1 {{references are unsupported in HLSL}}
+  Pair& operator=(const Pair& P)
+  {
+    this.First = P.First;
+    this.Second = P.Second;
+    //expected-error at +1 {{the '*' operator is unsupported in HLSL}}
+    return *this;
+  }
+
+  //expected-error at +2 {{references are unsupported in HLSL}}
+  //expected-error at +1 {{references are unsupported in HLSL}}
+  Pair& operator=(Pair&& P)
+  {
+    First = move(P.First);
+    Second = move(P.Second);
+    //expected-error at +1 {{the '*' operator is unsupported in HLSL}}
+    return *this;
+  }
+};
+
+struct Single {
+  int One;
+};
+
+void foo(Single S) {
+  int A = S.One;
+}
+
+void fn() {
+  //expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
+  Single S = Single();
+  //expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
+  Single S2 = Single(1);
+  //expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
+  Single S3 = Single(S);
+  //expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
+  foo(Single(1));
+  //expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
+  Single S4 = Single(1,2);
+  //expected-error at +1 {{HLSL doesn't support constructors or functional-style casts}}
+  Single S5 = {Single(1)};
+  //expected-error at +3 {{HLSL doesn't support constructors or functional-style casts}}
+  //expected-error at +2 {{HLSL doesn't support constructors or functional-style casts}}
+  //expected-error at +1 {{too many initializers in list for type 'Single' (expected 1 but found 2)}}
+  Single S6 = {Single(1), Single(2)};
+}

>From e5c81bac2fb698bcf608dfd1cde5de0b27881488 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall at microsoft.com>
Date: Wed, 15 Apr 2026 09:02:37 -0700
Subject: [PATCH 9/9] update how assignment works + add new tests + update
 existing tests

---
 clang/lib/Sema/SemaExpr.cpp                   | 14 +--
 .../BasicFeatures/OutputArguments.hlsl        | 22 ++---
 clang/test/CodeGenHLSL/StructAssign.hlsl      | 87 +++++++++++++++++++
 .../groupsharedArgs/StructTest.hlsl           | 10 +--
 .../StructuredBuffers-subscripts.hlsl         | 10 +--
 clang/test/CodeGenHLSL/this-assignment.hlsl   | 28 +++---
 .../Language/StructAssign-errors.hlsl         | 29 +++++++
 .../test/SemaHLSL/Language/StructAssign.hlsl  | 45 ++++++++++
 8 files changed, 186 insertions(+), 59 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/StructAssign.hlsl
 create mode 100644 clang/test/SemaHLSL/Language/StructAssign-errors.hlsl
 create mode 100644 clang/test/SemaHLSL/Language/StructAssign.hlsl

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5390c425faa4c..af103b0160c92 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10329,7 +10329,7 @@ AssignConvertType Sema::CheckSingleAssignmentConstraints(QualType LHSType,
     }
 
     if (ConvertRHS)
-      RHS = ImpCastExprToType(E, Ty, Kind);
+      RHS = ImpCastExprToType(E, Ty, Kind, E->getValueKind());
   }
 
   return result;
@@ -15533,18 +15533,6 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
 
   switch (Opc) {
   case BO_Assign:
-    // If this is HLSL and the LHS is a record try to perform aggregate
-    // initialization.
-    if (getLangOpts().HLSL && LHSExpr->getType()->isRecordType()) {
-      ResultTy = LHSExpr->getType();
-      ExprResult Init =
-          HLSL().tryAggregateInitialization(*this, ResultTy, RHSExpr);
-      if (Init.isInvalid())
-        return Init;
-      RHS = Init.get();
-      break;
-    }
-
     ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType(), Opc);
     if (getLangOpts().CPlusPlus &&
         LHS.get()->getObjectKind() != OK_ObjCProperty) {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index f66d39f5eec11..a85bffc0db6d6 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -161,14 +161,15 @@ void init(out S s) {
 // CHECK: [[S:%.*]] = alloca %struct.S
 // CHECK: [[Tmp:%.*]] = alloca %struct.S
 // CHECK: call void {{.*}}init{{.*}}(ptr noalias noundef nonnull align 1 dereferenceable(8) [[Tmp]])
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[Tmp]], i32 8, i1 false)
 // CHECK: [[X:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0
-// CHECK: [[X1:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[Tmp]], i32 0, i32 0
-// CHECK: [[X2:%.*]] = load i32, ptr [[X1]], align 1
-// CHECK: store i32 [[X2]], ptr [[X]], align 1
+// CHECK: [[Z:%.*]] = load i32, ptr [[X]], align 1
+// CHECK: [[Conv:%.*]] = sitofp i32 [[Z]] to float
 // CHECK: [[Y:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 1
-// CHECK: [[Y2:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[Tmp]], i32 0, i32 1
-// CHECK: [[Y3:%.*]] = load float, ptr [[Y2]], align 1
-// CHECK: store float [[Y3]], ptr [[Y]], align 1
+// CHECK: [[Y2:%.*]] = load float, ptr [[Y]], align 1
+// CHECK: [[Add:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[Conv]], [[Y2]]
+// CHECK: [[Conv2:%.*]] = fptosi float [[Add]] to i32
+// CHECK: ret i32 [[Conv2]]
 
 // OPT: ret i32 7
 export int case6() {
@@ -203,14 +204,7 @@ void init(inout R s) {
 // CHECK: store float [[Y3]], ptr [[Y]], align 1
 
 // CHECK: call void {{.*}}init{{.*}}(ptr noalias noundef nonnull align 1 dereferenceable(8) [[Tmp]])
-// CHECK: [[X:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[S]], i32 0, i32 0
-// CHECK: [[X2:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[Tmp]], i32 0, i32 0
-// CHECK: [[X3:%.*]] = load i32, ptr [[X2]], align 1
-// CHECK: store i32 [[X3]], ptr [[X]], align 1
-// CHECK: [[Y:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[S]], i32 0, i32 1
-// CHECK: [[Y2:%.*]] = getelementptr inbounds nuw %struct.R, ptr [[Tmp]], i32 0, i32 1
-// CHECK: [[Y3:%.*]] = load float, ptr [[Y2]], align 1
-// CHECK: store float [[Y3]], ptr [[Y]], align 1
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 [[Tmp]], i32 8, i1 false)
 
 // OPT: ret i32 7
 export int case7() {
diff --git a/clang/test/CodeGenHLSL/StructAssign.hlsl b/clang/test/CodeGenHLSL/StructAssign.hlsl
new file mode 100644
index 0000000000000..ceb6ecfec78ae
--- /dev/null
+++ b/clang/test/CodeGenHLSL/StructAssign.hlsl
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+struct Base {
+  int A, B;
+};
+
+struct Derived: Base {
+  float F, G;
+};
+
+struct Other {
+  int C, D;
+};
+
+export void fn() {
+// CHECK: [[B:%.*]] = alloca %struct.Base, align 1
+// CHECK-NEXT: [[C:%.*]]  = alloca %struct.Base, align 1
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Base, align 1
+// CHECK-NEXT: [[O:%.*]] = alloca %struct.Other, align 1
+// CHECK-NEXT: [[Tmp1:%.*]] = alloca %struct.Base, align 1
+// CHECK-NEXT: [[AggTmp:%.*]] = alloca %struct.Other, align 1
+// CHECK-NEXT: [[I2:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT: [[Tmp5:%.*]] = alloca %struct.Base, align 1
+// CHECK-NEXT: [[D:%.*]] = alloca %struct.Derived, align 1
+// CHECK-NEXT: [[Tmp9:%.*]] = alloca %struct.Base, align 1
+// CHECK-NEXT: [[AggTmp10:%.*]] = alloca %struct.Derived, align 1
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[B]], ptr align 1 @__const._Z2fnv.B, i32 8, i1 false)
+  Base B = {1,2};
+  
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[C]], ptr align 1 @__const._Z2fnv.C, i32 8, i1 false)
+  Base C = {5,6};
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[B]], ptr align 1 [[C]], i32 8, i1 false)
+// These Tmp assignments are the "result" of the assignment being stored...
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[B]], i32 8, i1 false)
+  B = C;
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[O]], ptr align 1 @__const._Z2fnv.O, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AggTmp]], ptr align 1 [[O]], i32 8, i1 false)
+  Other O = {7,8};
+
+// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds %struct.Base, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds %struct.Base, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds %struct.Other, ptr [[AggTmp]], i32 0, i32 0
+// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds %struct.Other, ptr [[AggTmp]], i32 0, i32 1
+// CHECK-NEXT: [[A3:%.*]] = load i32, ptr [[A2]], align 4
+// CHECK-NEXT: store i32 [[A3]], ptr [[A1]], align 4
+// CHECK-NEXT: [[B3:%.*]] = load i32, ptr [[B2]], align 4
+// CHECK-NEXT: store i32 [[B3]], ptr [[B1]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp1]], ptr align 1 [[C]], i32 8, i1 false)
+// ^ "result" of the assignment is C so it gets stored to tmp1
+  C = (Base)O;
+
+// CHECK-NEXT:  store <2 x i32> <i32 9, i32 10>, ptr %I2, align 4
+  int2 I2 = {9,10};
+
+// CHECK-NEXT: [[I3:%.*]] = load <2 x i32>, ptr [[I2]], align 4
+// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds %struct.Base, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds %struct.Base, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[L0:%.*]] = extractelement <2 x i32> [[I3]], i64 0
+// CHECK-NEXT: store i32 [[L0]], ptr [[A1]], align 4
+// CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[I3]], i64 1
+// CHECK-NEXT: store i32 [[L1]], ptr [[B1]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp5]], ptr align 1 [[C]], i32 8, i1 false)
+// ^ the result of the assignment is C so it gets stored to tmp5
+  C = (Base)I2;
+
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[D]], ptr align 1 @__const._Z2fnv.D, i32 16, i1 false)
+  Derived D = {1,2,3,4};
+
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AggTmp10]], ptr align 1 [[D]], i32 16, i1 false)
+// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds %struct.Base, ptr [[B]], i32 0, i32 0
+// CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds %struct.Base, ptr [[B]], i32 0, i32 1
+// CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AggTmp10]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AggTmp10]], i32 0, i32 0, i32 1
+// CHECK-NEXT: [[F1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AggTmp10]], i32 0, i32 1
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.Derived, ptr [[AggTmp10]], i32 0, i32 2
+// CHECK-NEXT: [[A3:%.*]] = load i32, ptr [[A2]], align 4
+// CHECK-NEXT: store i32 [[A3]], ptr [[A1]], align 4
+// CHECK-NEXT: [[B3:%.*]] = load i32, ptr [[B2]], align 4
+// CHECK-NEXT: store i32 [[B3]], ptr [[B1]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp9]], ptr align 1 [[B]], i32 8, i1 false)
+// ^ the result of the assignment is B so it gets stored to tmp9
+  B = (Base)D;
+}
diff --git a/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl b/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl
index a43381db4d3b2..4c6f0f434acf8 100644
--- a/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl
+++ b/clang/test/CodeGenHLSL/groupsharedArgs/StructTest.hlsl
@@ -35,12 +35,11 @@ void fn1(groupshared Shared Sh) {
   Sh.Arr[1] = D;
 }
 
-
 // CHECK-LABEL: define internal void @_Z4mainDv3_j(<3 x i32> noundef %TID)
 [numthreads(4, 1, 1)]
 void main(uint3 TID : SV_GroupThreadID) {
 // CHECK: [[SAddr:%.*]] = alloca %struct.Shared, align 1
-// CHECK: call void @_Z3fn1RU3AS36Shared(ptr addrspace(3) noundef align 1 dereferenceable(16) [[SharedData]]) #3
+// CHECK: call void @_Z3fn1RU3AS36Shared(ptr addrspace(3) noundef align 1 dereferenceable(16) [[SharedData]])
   fn1(SharedData);
 
 // CHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw %struct.Shared, ptr [[SAddr]], i32 0, i32 0
@@ -54,11 +53,6 @@ void main(uint3 TID : SV_GroupThreadID) {
 // CHECK-NEXT: store double [[Arr2]], ptr [[Arr]], align 1
   Shared S = SharedData;
 
-// CHECK-NEXT: [[ASD:%.*]] = load i32, ptr addrspace(3) [[SharedData]], align 1
-// CHECK-NEXT: store i32 [[ASD]], ptr addrspace(3) [[SharedData2]], align 1
-// CHECK-NEXT: [[FSD:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData]], i32 4), align 1
-// CHECK-NEXT: store float [[FSD]], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData2]], i32 4), align 1
-// CHECK-NEXT: [[ArrSD:%.*]] = load double, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData]], i32 8), align 1
-// CHECK-NEXT: store double [[ArrSD]], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) [[SharedData2]], i32 8), align 1
+// CHECK-NEXT: call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 1 @SharedData2, ptr addrspace(3) align 1 @SharedData, i32 16, i1 false)
   SharedData2 = SharedData;
 }
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
index 3128aa2193954..f7f8676cf5d85 100644
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-subscripts.hlsl
@@ -36,15 +36,13 @@ void main(unsigned GI : SV_GroupIndex) {
 
   // SPV: %[[OUTPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
   // SPV: %[[INPTR:.*]] = call noundef align 1 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.Ss_12_1t.i32(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 1) %{{.*}}, i32 %{{.*}})
-  // SPV: call void @llvm.memcpy.p0.p11.i64(ptr align 1 %ref.tmp.i, ptr addrspace(11) align 1 %[[INPTR]], i64 4, i1 false)
-  // SPV: %[[L56:.*]] = load float, ptr %ref.tmp.i, align 1
-  // SPV: store float %[[L56]], ptr addrspace(11) %[[OUTPTR]]
+  // SPV: call void @llvm.memcpy.p11.p11.i64(ptr addrspace(11) align 1 %[[OUTPTR]], ptr addrspace(11) align 1 %[[INPTR]], i64 4, i1 false)
+  // SPV: call void @llvm.memcpy.p0.p11.i64(ptr align 1 %tmp.i, ptr addrspace(11) align 1 %[[OUTPTR]], i64 4, i1 false)
 
   // For DXIL, hlsl_device and the default address space map to the same target address space. No need for an address space cast.
   // DXIL: %[[OUTPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
   // DXIL: %[[INPTR:.*]] = call noundef nonnull align 1 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t.i32(target("dx.RawBuffer", %struct.S, 1, 0) %{{.*}}, i32 %{{.*}})
-  // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %ref.tmp.i, ptr align 1 %[[INPTR]], i32 4, i1 false)
-  // DXIL: %[[L70:.*]] = load float, ptr %ref.tmp.i, align 1
-  // DXIL: store float %[[L70]], ptr %[[OUTPTR]], align 1
+  // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %[[OUTPTR]], ptr align 1 %[[INPTR]], i32 4, i1 false)
+  // DXIL: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %tmp.i, ptr align 1 %[[OUTPTR]], i32 4, i1 false)
   RWSB3[0] = RWSB3[1];
 }
diff --git a/clang/test/CodeGenHLSL/this-assignment.hlsl b/clang/test/CodeGenHLSL/this-assignment.hlsl
index 51041dd087944..2c1c6cdd9a48d 100644
--- a/clang/test/CodeGenHLSL/this-assignment.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment.hlsl
@@ -36,30 +36,27 @@ void main() {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%Another = alloca %struct.Pair, align 1
+// CHECK-NEXT:%tmp = alloca %struct.Pair, align 1
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
 // CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %Another, ptr align 1 @__const._ZN4Pair8getFirstEv.Another, i32 8, i1 false)
+// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %this1, ptr align 1 %Another, i32 8, i1 false)
+// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %this1, i32 8, i1 false)
 // CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
-// CHECK-NEXT:%First2 = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 0
-// CHECK-NEXT:%0 = load i32, ptr %First2, align 1
-// CHECK-NEXT:store i32 %0, ptr %First, align 1
-// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
-// CHECK-NEXT:%Second3 = getelementptr inbounds nuw %struct.Pair, ptr %Another, i32 0, i32 1
-// CHECK-NEXT:%1 = load i32, ptr %Second3, align 1
-// CHECK-NEXT:store i32 %1, ptr %Second, align 1
-// CHECK-NEXT:%First4 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
-// CHECK-NEXT:%2 = load i32, ptr %First4, align 1
-// CHECK-NEXT:ret i32 %2
+// CHECK-NEXT:%0 = load i32, ptr %First, align 1
+// CHECK-NEXT:ret i32 %0
 
 // CHECK-LABEL:     define {{.*}}getSecond
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
+// CHECK-NEXT:%tmp = alloca %struct.Pair, align 1
 // CHECK-NEXT:store ptr %this, ptr %this.addr, align 4
 // CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4
 // CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
 // CHECK-NEXT:store i32 0, ptr %First, align 1
 // CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
 // CHECK-NEXT:store i32 0, ptr %Second, align 1
+// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %this1, i32 8, i1 false)
 // CHECK-NEXT:%Second2 = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
 // CHECK-NEXT:%0 = load i32, ptr %Second2, align 1
 // CHECK-NEXT:ret i32 %0
@@ -69,18 +66,13 @@ void main() {
 // CHECK-NEXT: [[ResultPtr:%.*]] = alloca ptr
 // CHECK-NEXT: [[ThisPtrAddr:%.*]] = alloca ptr
 // CHECK-NEXT: [[ObjIndirectAddr:%.*]] = alloca ptr
+// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.Pair, align 1
 // CHECK-NEXT: store ptr %agg.result, ptr [[ResultPtr]]
 // CHECK-NEXT: store ptr {{.*}}, ptr [[ThisPtrAddr]]
 // CHECK-NEXT: store ptr %Obj, ptr [[ObjIndirectAddr]]  
 // CHECK-NEXT: [[ThisPtr:%.*]] = load ptr, ptr [[ThisPtrAddr]]
-// CHECK-NEXT: [[FirstAddr:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
-// CHECK-NEXT: [[FirstAddr2:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Obj, i32 0, i32 0
-// CHECK-NEXT: [[Z:%.*]] = load i32, ptr [[FirstAddr2]]
-// CHECK-NEXT: store i32 [[Z]], ptr [[FirstAddr]], align 1
-// CHECK-NEXT: [[Second:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 1
-// CHECK-NEXT: [[Second3:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr %Obj, i32 0, i32 1
-// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Second3]], align 1
-// CHECK-NEXT: store i32 [[Y]], ptr [[Second]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ThisPtr]], ptr align 1 %Obj, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[Tmp]], ptr align 1 [[ThisPtr]], i32 8, i1 false)
 // CHECK-NEXT: [[First4:%.*]] = getelementptr inbounds nuw %struct.Pair, ptr [[ThisPtr]], i32 0, i32 0
 // CHECK-NEXT: [[X:%.*]] = load i32, ptr [[First4]], align 1
 // CHECK-NEXT: [[Add:%.*]] = add nsw i32 [[X]], 2
diff --git a/clang/test/SemaHLSL/Language/StructAssign-errors.hlsl b/clang/test/SemaHLSL/Language/StructAssign-errors.hlsl
new file mode 100644
index 0000000000000..d43774d94139e
--- /dev/null
+++ b/clang/test/SemaHLSL/Language/StructAssign-errors.hlsl
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -verify
+
+struct Base {
+  int A, B;
+};
+
+struct Derived: Base {
+  float F, G;
+};
+
+struct Other {
+  int C, D;
+};
+
+export void fn() {
+  Base C = {5,6};
+
+  Other O = {7,8};
+  // expected-error at +1{{assigning to 'Base' from incompatible type 'Other'}}
+  C = O;
+
+  int2 I2 = {9,10};
+  // expected-error at +1{{assigning to 'Base' from incompatible type 'int2' (aka 'vector<int, 2>')}}
+  C = I2;
+
+  Derived D = {1,2,3,4};
+  // expected-error at +1{{assigning to 'Base' from incompatible type 'Derived'}}
+  C = D;
+}
diff --git a/clang/test/SemaHLSL/Language/StructAssign.hlsl b/clang/test/SemaHLSL/Language/StructAssign.hlsl
new file mode 100644
index 0000000000000..2ddd2aa056032
--- /dev/null
+++ b/clang/test/SemaHLSL/Language/StructAssign.hlsl
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header %s -ast-dump | FileCheck %s
+
+struct Base {
+  int A, B;
+};
+
+struct Derived: Base {
+  float F, G;
+};
+
+struct Other {
+  int C, D;
+};
+
+// CHECK-LABEL: FunctionDecl {{.*}} fn
+export void fn() {
+  Base B = {1,2};
+  Base C = {5,6};
+// CHECK: BinaryOperator {{.*}} 'Base' lvalue '='
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Base' lvalue Var {{.*}} 'B' 'Base'
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Base' lvalue Var {{.*}} 'C' 'Base'
+  B = C;
+
+  Other O = {7,8};
+// CHECK: BinaryOperator {{.*}} 'Base' lvalue '='
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Base' lvalue Var {{.*}} 'C' 'Base'
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'Base' <HLSLElementwiseCast>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Other' lvalue Var {{.*}} 'O' 'Other'
+  C = (Base)O;
+
+  int2 I2 = {9,10};
+// CHECK: BinaryOperator {{.*}} 'Base' lvalue '='
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Base' lvalue Var {{.*}} 'C' 'Base'
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'Base' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector<int, 2>' <LValueToRValue> part_of_explicit_cast
+// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector<int, 2>' lvalue Var {{.*}} 'I2' 'int2':'vector<int, 2>'
+  C = (Base)I2;
+
+  Derived D = {1,2,3,4};
+// CHECK: BinaryOperator {{.*}} 'Base' lvalue '='
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Base' lvalue Var {{.*}} 'B' 'Base'
+// CHECK-NEXT: CStyleCastExpr {{.*}} 'Base' <HLSLElementwiseCast>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'Derived' lvalue Var {{.*}} 'D' 'Derived'
+  B = (Base)D;
+}



More information about the cfe-commits mailing list