[clang] [HLSL][Matrix] introduce MatrixElementExpr as accessor and swizzle operator (PR #171225)

Farzon Lotfi via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 22 08:39:25 PST 2026


https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/171225

>From f25e666d32b90432f0c19465c3e9a12dd89a2ab8 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Sun, 7 Dec 2025 19:07:17 -0500
Subject: [PATCH 1/4] [HLSL][Matrix] Add Matrix Swizzle AST Node

fixes #159438
---
 clang/include/clang/AST/ComputeDependence.h   |   2 +
 clang/include/clang/AST/Expr.h                | 100 +++--
 clang/include/clang/AST/RecursiveASTVisitor.h |   1 +
 clang/include/clang/AST/TextNodeDumper.h      |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  12 +
 clang/include/clang/Basic/StmtNodes.td        |   1 +
 clang/include/clang/Sema/SemaHLSL.h           |   4 +
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/AST/ByteCode/Compiler.cpp           |   6 +
 clang/lib/AST/ByteCode/Compiler.h             |   1 +
 clang/lib/AST/ComputeDependence.cpp           |   4 +
 clang/lib/AST/Expr.cpp                        | 126 ++++++-
 clang/lib/AST/ExprClassification.cpp          |  15 +
 clang/lib/AST/ExprConstant.cpp                |   8 +
 clang/lib/AST/ItaniumMangle.cpp               |   1 +
 clang/lib/AST/StmtPrinter.cpp                 |   6 +
 clang/lib/AST/StmtProfile.cpp                 |   5 +
 clang/lib/AST/TextNodeDumper.cpp              |   4 +
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp    |   2 +
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp      |   2 +
 clang/lib/CIR/CodeGen/CIRGenFunction.h        |   2 +
 clang/lib/CodeGen/CGExpr.cpp                  |  47 +++
 clang/lib/CodeGen/CGExprScalar.cpp            |   1 +
 clang/lib/CodeGen/CodeGenFunction.h           |   1 +
 clang/lib/Sema/SemaExceptionSpec.cpp          |   1 +
 clang/lib/Sema/SemaExpr.cpp                   |   3 +
 clang/lib/Sema/SemaExprMember.cpp             |  17 +
 clang/lib/Sema/SemaHLSL.cpp                   | 199 ++++++++++
 clang/lib/Sema/TreeTransform.h                |  31 +-
 clang/lib/Serialization/ASTReaderStmt.cpp     |  11 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |   8 +
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |   1 +
 .../AST/HLSL/matrix-member-access-scalar.hlsl |  38 ++
 ...x-member-access-swizzle-ast-dump-json.hlsl |  25 ++
 ...atrix-member-access-swizzle-ast-print.hlsl |  21 ++
 .../HLSL/matrix-member-access-swizzle.hlsl    |  49 +++
 .../pch_with_matrix_element_accessor.hlsl     |  26 ++
 ...member-one-based-accessor-scalar-load.hlsl | 230 ++++++++++++
 ...ember-one-based-accessor-scalar-store.hlsl | 345 ++++++++++++++++++
 .../matrix-member-one-based-swizzle-load.hlsl | 108 ++++++
 ...matrix-member-one-based-swizzle-store.hlsl | 230 ++++++++++++
 ...ember-zero-based-accessor-scalar-load.hlsl | 230 ++++++++++++
 ...mber-zero-based-accessor-scalar-store.hlsl | 345 ++++++++++++++++++
 ...matrix-member-zero-based-swizzle-load.hlsl | 108 ++++++
 ...atrix-member-zero-based-swizzle-store.hlsl | 230 ++++++++++++
 .../SemaHLSL/matrix-member-access-errors.hlsl |  28 ++
 clang/tools/libclang/CXCursor.cpp             |   1 +
 47 files changed, 2600 insertions(+), 40 deletions(-)
 create mode 100644 clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
 create mode 100644 clang/test/AST/HLSL/matrix-member-access-swizzle-ast-dump-json.hlsl
 create mode 100644 clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl
 create mode 100644 clang/test/AST/HLSL/matrix-member-access-swizzle.hlsl
 create mode 100644 clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-load.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-store.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-load.hlsl
 create mode 100644 clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-store.hlsl
 create mode 100644 clang/test/SemaHLSL/matrix-member-access-errors.hlsl

diff --git a/clang/include/clang/AST/ComputeDependence.h b/clang/include/clang/AST/ComputeDependence.h
index 895105640b931..3a3c86842501a 100644
--- a/clang/include/clang/AST/ComputeDependence.h
+++ b/clang/include/clang/AST/ComputeDependence.h
@@ -45,6 +45,7 @@ class ArrayInitLoopExpr;
 class ImplicitValueInitExpr;
 class InitListExpr;
 class ExtVectorElementExpr;
+class MatrixElementExpr;
 class BlockExpr;
 class AsTypeExpr;
 class DeclRefExpr;
@@ -135,6 +136,7 @@ ExprDependence computeDependence(ArrayInitLoopExpr *E);
 ExprDependence computeDependence(ImplicitValueInitExpr *E);
 ExprDependence computeDependence(InitListExpr *E);
 ExprDependence computeDependence(ExtVectorElementExpr *E);
+ExprDependence computeDependence(MatrixElementExpr *E);
 ExprDependence computeDependence(BlockExpr *E,
                                  bool ContainsUnexpandedParameterPack);
 ExprDependence computeDependence(AsTypeExpr *E);
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 3e30a8b420f19..676f59c7c104b 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -291,6 +291,7 @@ class Expr : public ValueStmt {
     LV_NotObjectType,
     LV_IncompleteVoidType,
     LV_DuplicateVectorComponents,
+    LV_DuplicateMatrixComponents,
     LV_InvalidExpression,
     LV_InvalidMessageExpression,
     LV_MemberFunction,
@@ -306,6 +307,7 @@ class Expr : public ValueStmt {
     MLV_NotObjectType,
     MLV_IncompleteVoidType,
     MLV_DuplicateVectorComponents,
+    MLV_DuplicateMatrixComponents,
     MLV_InvalidExpression,
     MLV_LValueCast,           // Specialized form of MLV_InvalidExpression.
     MLV_IncompleteType,
@@ -344,6 +346,7 @@ class Expr : public ValueStmt {
       CL_Void, // Void cannot be an lvalue in C.
       CL_AddressableVoid, // Void expression whose address can be taken in C.
       CL_DuplicateVectorComponents, // A vector shuffle with dupes.
+      CL_DuplicateMatrixComponents, // A matrix shuffle with dupes.
       CL_MemberFunction, // An expression referring to a member function
       CL_SubObjCPropertySetting,
       CL_ClassTemporary, // A temporary of class type, or subobject thereof.
@@ -6554,30 +6557,24 @@ class GenericSelectionExpr final
 // Clang Extensions
 //===----------------------------------------------------------------------===//
 
-/// ExtVectorElementExpr - This represents access to specific elements of a
-/// vector, and may occur on the left hand side or right hand side.  For example
-/// the following is legal:  "V.xy = V.zw" if V is a 4 element extended vector.
-///
-/// Note that the base may have either vector or pointer to vector type, just
-/// like a struct field reference.
-///
-class ExtVectorElementExpr : public Expr {
+template <class Derived> class ElementAccessExprBase : public Expr {
+protected:
   Stmt *Base;
   IdentifierInfo *Accessor;
   SourceLocation AccessorLoc;
-public:
-  ExtVectorElementExpr(QualType ty, ExprValueKind VK, Expr *base,
-                       IdentifierInfo &accessor, SourceLocation loc)
-      : Expr(ExtVectorElementExprClass, ty, VK,
-             (VK == VK_PRValue ? OK_Ordinary : OK_VectorComponent)),
-        Base(base), Accessor(&accessor), AccessorLoc(loc) {
-    setDependence(computeDependence(this));
+
+  ElementAccessExprBase(StmtClass SC, QualType Ty, ExprValueKind VK, Expr *Base,
+                        IdentifierInfo &Accessor, SourceLocation Loc,
+                        ExprObjectKind OK)
+      : Expr(SC, Ty, VK, OK), Base(Base), Accessor(&Accessor),
+        AccessorLoc(Loc) {
+    setDependence(computeDependence(static_cast<Derived *>(this)));
   }
 
-  /// Build an empty vector element expression.
-  explicit ExtVectorElementExpr(EmptyShell Empty)
-    : Expr(ExtVectorElementExprClass, Empty) { }
+  explicit ElementAccessExprBase(StmtClass SC, EmptyShell Empty)
+      : Expr(SC, Empty) {}
 
+public:
   const Expr *getBase() const { return cast<Expr>(Base); }
   Expr *getBase() { return cast<Expr>(Base); }
   void setBase(Expr *E) { Base = E; }
@@ -6588,22 +6585,45 @@ class ExtVectorElementExpr : public Expr {
   SourceLocation getAccessorLoc() const { return AccessorLoc; }
   void setAccessorLoc(SourceLocation L) { AccessorLoc = L; }
 
-  /// getNumElements - Get the number of components being selected.
-  unsigned getNumElements() const;
-
-  /// containsDuplicateElements - Return true if any element access is
-  /// repeated.
-  bool containsDuplicateElements() const;
-
-  /// getEncodedElementAccess - Encode the elements accessed into an llvm
-  /// aggregate Constant of ConstantInt(s).
-  void getEncodedElementAccess(SmallVectorImpl<uint32_t> &Elts) const;
-
   SourceLocation getBeginLoc() const LLVM_READONLY {
     return getBase()->getBeginLoc();
   }
   SourceLocation getEndLoc() const LLVM_READONLY { return AccessorLoc; }
 
+  /*static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ExtVectorElementExprClass ||
+           T->getStmtClass() == MatrixElementExprClass;
+  }*/
+
+  child_range children() { return child_range(&Base, &Base + 1); }
+  const_child_range children() const {
+    return const_child_range(&Base, &Base + 1);
+  }
+};
+
+/// ExtVectorElementExpr - This represents access to specific elements of a
+/// vector, and may occur on the left hand side or right hand side.  For example
+/// the following is legal:  "V.xy = V.zw" if V is a 4 element extended vector.
+///
+/// Note that the base may have either vector or pointer to vector type, just
+/// like a struct field reference.
+///
+class ExtVectorElementExpr
+    : public ElementAccessExprBase<ExtVectorElementExpr> {
+public:
+  ExtVectorElementExpr(QualType Ty, ExprValueKind VK, Expr *Base,
+                       IdentifierInfo &Accessor, SourceLocation Loc)
+      : ElementAccessExprBase(
+            ExtVectorElementExprClass, Ty, VK, Base, Accessor, Loc,
+            (VK == VK_PRValue ? OK_Ordinary : OK_VectorComponent)) {}
+
+  explicit ExtVectorElementExpr(EmptyShell Empty)
+      : ElementAccessExprBase(ExtVectorElementExprClass, Empty) {}
+
+  unsigned getNumElements() const;
+  bool containsDuplicateElements() const;
+  void getEncodedElementAccess(SmallVectorImpl<uint32_t> &Elts) const;
+
   /// isArrow - Return true if the base expression is a pointer to vector,
   /// return false if the base expression is a vector.
   bool isArrow() const;
@@ -6611,11 +6631,25 @@ class ExtVectorElementExpr : public Expr {
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ExtVectorElementExprClass;
   }
+};
 
-  // Iterators
-  child_range children() { return child_range(&Base, &Base+1); }
-  const_child_range children() const {
-    return const_child_range(&Base, &Base + 1);
+class MatrixElementExpr : public ElementAccessExprBase<MatrixElementExpr> {
+public:
+  MatrixElementExpr(QualType Ty, ExprValueKind VK, Expr *Base,
+                    IdentifierInfo &Accessor, SourceLocation Loc)
+      : ElementAccessExprBase(
+            MatrixElementExprClass, Ty, VK, Base, Accessor, Loc,
+            OK_Ordinary /*TODO: Should we add a new OK_MatrixComponent?*/) {}
+
+  explicit MatrixElementExpr(EmptyShell Empty)
+      : ElementAccessExprBase(MatrixElementExprClass, Empty) {}
+
+  unsigned getNumElements() const;
+  bool containsDuplicateElements() const;
+  void getEncodedElementAccess(SmallVectorImpl<uint32_t> &Elts) const;
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == MatrixElementExprClass;
   }
 };
 
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index c416625ad64fd..7368a6e95b479 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2942,6 +2942,7 @@ DEF_TRAVERSE_STMT(UserDefinedLiteral, {})
 DEF_TRAVERSE_STMT(DesignatedInitExpr, {})
 DEF_TRAVERSE_STMT(DesignatedInitUpdateExpr, {})
 DEF_TRAVERSE_STMT(ExtVectorElementExpr, {})
+DEF_TRAVERSE_STMT(MatrixElementExpr, {})
 DEF_TRAVERSE_STMT(GNUNullExpr, {})
 DEF_TRAVERSE_STMT(ImplicitValueInitExpr, {})
 DEF_TRAVERSE_STMT(NoInitExpr, {})
diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h
index 88ecd526e3d7e..ab828be124b0b 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -286,6 +286,7 @@ class TextNodeDumper
   void VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Node);
   void VisitMemberExpr(const MemberExpr *Node);
   void VisitExtVectorElementExpr(const ExtVectorElementExpr *Node);
+  void VisitMatrixElementExpr(const MatrixElementExpr *Node);
   void VisitBinaryOperator(const BinaryOperator *Node);
   void VisitCompoundAssignOperator(const CompoundAssignOperator *Node);
   void VisitAddrLabelExpr(const AddrLabelExpr *Node);
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 44541a4c68197..c8b6519704747 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9357,6 +9357,8 @@ def err_typecheck_lvalue_casts_not_supported : Error<
 
 def err_typecheck_duplicate_vector_components_not_mlvalue : Error<
   "vector is not assignable (contains duplicate components)">;
+def err_typecheck_duplicate_matrix_components_not_mlvalue : Error<
+  "matrix is not assignable (contains duplicate components)">;
 def err_block_decl_ref_not_modifiable_lvalue : Error<
   "variable is not assignable (missing __block type specifier)">;
 def err_lambda_decl_ref_not_modifiable_lvalue : Error<
@@ -13106,6 +13108,7 @@ def err_builtin_matrix_stride_too_small: Error<
   "stride must be greater or equal to the number of rows">;
 def err_builtin_matrix_invalid_dimension: Error<
   "%0 dimension is outside the allowed range [1, %1]">;
+def err_builtin_matrix_invalid_member: Error<"invalid matrix member '%0' expected %1">;
 
 def warn_mismatched_import : Warning<
   "import %select{module|name}0 (%1) does not match the import %select{module|name}0 (%2) of the "
@@ -13377,6 +13380,15 @@ def err_hlsl_builtin_scalar_vector_mismatch
           "%select{all|second and third}0 arguments to %1 must be of scalar or "
           "vector type with matching scalar element type%diff{: $ vs $|}2,3">;
 
+def err_hlsl_matrix_element_not_in_bounds : Error<
+  "matrix %select{row|column}0 element accessor is out of bounds of %select{zero|one}1 based indexing">;
+
+def err_hlsl_matrix_index_out_of_bounds : Error<
+  "matrix %select{row|column}0 index %1 is out of bounds of %select{rows|columns}0 size %2">;
+
+def err_hlsl_matrix_swizzle_invalid_length : Error<
+  "matrix swizzle length must be between 1 and 4 but is %0">;
+
 def warn_hlsl_impcast_vector_truncation : Warning<
   "implicit conversion truncates vector: %0 to %1">, InGroup<VectorConversion>;
 
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index b08b9fe3b9271..6b67b2e3a8e93 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -93,6 +93,7 @@ def CStyleCastExpr : StmtNode<ExplicitCastExpr>;
 def OMPArrayShapingExpr : StmtNode<Expr>;
 def CompoundLiteralExpr : StmtNode<Expr>;
 def ExtVectorElementExpr : StmtNode<Expr>;
+def MatrixElementExpr : StmtNode<Expr>;
 def InitListExpr : StmtNode<Expr>;
 def DesignatedInitExpr : StmtNode<Expr>;
 def DesignatedInitUpdateExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index 99d8ed137b0c2..e7e5f4bba4088 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -215,6 +215,10 @@ class SemaHLSL : public SemaBase {
   bool transformInitList(const InitializedEntity &Entity, InitListExpr *Init);
   bool handleInitialization(VarDecl *VDecl, Expr *&Init);
   void deduceAddressSpace(VarDecl *Decl);
+  QualType CheckMatrixComponent(Sema &S, QualType baseType, ExprValueKind &VK,
+                                SourceLocation OpLoc,
+                                const IdentifierInfo *CompName,
+                                SourceLocation CompLoc);
 
 private:
   // HLSL resource type attributes need to be processed all at once.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5a86d540e5d0b..022639b335bb8 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1696,6 +1696,9 @@ enum StmtCode {
   /// An ExtVectorElementExpr record.
   EXPR_EXT_VECTOR_ELEMENT,
 
+  /// A MatrixElementExpr record.
+  EXPR_MATRIX_ELEMENT,
+
   /// An InitListExpr record.
   EXPR_INIT_LIST,
 
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 21f8db06919ed..8ca53af9ad108 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4116,6 +4116,12 @@ bool Compiler<Emitter>::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
   return true;
 }
 
+template <class Emitter>
+bool Compiler<Emitter>::VisitMatrixElementExpr(const MatrixElementExpr *Node) {
+  // TODO
+  return false;
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::VisitExtVectorElementExpr(
     const ExtVectorElementExpr *E) {
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 1bd15c3d79563..8f9a2b16f8596 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -223,6 +223,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
   bool VisitConvertVectorExpr(const ConvertVectorExpr *E);
   bool VisitShuffleVectorExpr(const ShuffleVectorExpr *E);
   bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E);
+  bool VisitMatrixElementExpr(const MatrixElementExpr *E);
   bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E);
   bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
   bool VisitStmtExpr(const StmtExpr *E);
diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp
index 8429f17d26be5..34167eee8d8f2 100644
--- a/clang/lib/AST/ComputeDependence.cpp
+++ b/clang/lib/AST/ComputeDependence.cpp
@@ -256,6 +256,10 @@ ExprDependence clang::computeDependence(ExtVectorElementExpr *E) {
   return E->getBase()->getDependence();
 }
 
+ExprDependence clang::computeDependence(MatrixElementExpr *E) {
+  return E->getBase()->getDependence();
+}
+
 ExprDependence clang::computeDependence(BlockExpr *E,
                                         bool ContainsUnexpandedParameterPack) {
   auto D = toExprDependenceForImpliedType(E->getType()->getDependence());
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 616db5df23c5f..e8e2ff96cd84a 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -25,6 +25,7 @@
 #include "clang/AST/IgnoreExpr.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/RecordLayout.h"
+#include "clang/AST/TypeBase.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceManager.h"
@@ -3802,6 +3803,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case BinaryConditionalOperatorClass:
   case CompoundLiteralExprClass:
   case ExtVectorElementExprClass:
+  case MatrixElementExprClass:
   case DesignatedInitExprClass:
   case DesignatedInitUpdateExprClass:
   case ArrayInitLoopExprClass:
@@ -4422,7 +4424,14 @@ unsigned ExtVectorElementExpr::getNumElements() const {
   return 1;
 }
 
-/// containsDuplicateElements - Return true if any element access is repeated.
+unsigned MatrixElementExpr::getNumElements() const {
+  if (const ConstantMatrixType *MT = getType()->getAs<ConstantMatrixType>())
+    return MT->getNumElementsFlattened();
+  return 1;
+}
+
+/// containsDuplicateElements - Return true if any Vector element access is
+/// repeated.
 bool ExtVectorElementExpr::containsDuplicateElements() const {
   // FIXME: Refactor this code to an accessor on the AST node which returns the
   // "type" of component access, and share with code below and in Sema.
@@ -4443,6 +4452,68 @@ bool ExtVectorElementExpr::containsDuplicateElements() const {
   return false;
 }
 
+/// containsDuplicateElements - Return true if any Matrix element access is
+/// repeated.
+bool MatrixElementExpr::containsDuplicateElements() const {
+  StringRef Comp = Accessor->getName();
+  assert(!Comp.empty() && Comp[0] == '_' && "invalid matrix accessor");
+
+  // Get the matrix type so we know bounds.
+  const ConstantMatrixType *MT =
+      getBase()->getType()->getAs<ConstantMatrixType>();
+  assert(MT && "MatrixElementExpr base must be a matrix type");
+
+  unsigned Rows = MT->getNumRows();
+  unsigned Cols = MT->getNumColumns();
+  unsigned Max = Rows * Cols;
+
+  // Zero-indexed: _mRC  (4 chars per component)
+  // One-indexed: _RC    (3 chars per component)
+  bool IsZeroIndexed = false;
+  unsigned ChunkLen = 0;
+
+  if (Comp.size() >= 2 && Comp[0] == '_' && Comp[1] == 'm') {
+    IsZeroIndexed = true;
+    ChunkLen = 4;
+  } else {
+    IsZeroIndexed = false;
+    ChunkLen = 3;
+  }
+
+  assert(ChunkLen && "unrecognized matrix swizzle format");
+  assert(Comp.size() % ChunkLen == 0 &&
+         "matrix swizzle accessor has invalid length");
+
+  // Track visited elements using real matrix size.
+  SmallVector<bool, 16> Seen(Max, false);
+
+  for (unsigned I = 0, e = Comp.size(); I < e; I += ChunkLen) {
+    unsigned Row = 0, Col = 0;
+
+    if (IsZeroIndexed) {
+      // Pattern: _mRC
+      assert(Comp[I] == '_' && Comp[I + 1] == 'm');
+      Row = Comp[I + 2] - '0'; // 0..(Rows-1)
+      Col = Comp[I + 3] - '0';
+    } else {
+      // Pattern: _RC
+      assert(Comp[I] == '_');
+      Row = (Comp[I + 1] - '1'); // 1..Rows (ie same as 0..Rows-1)
+      Col = (Comp[I + 2] - '1');
+    }
+
+    // Bounds check (Sema should enforce correctness, but we assert anyway)
+    assert(Row < Rows && Col < Cols && "matrix swizzle index out of bounds");
+
+    unsigned Index = Row * Cols + Col;
+    if (Seen[Index])
+      return true;
+
+    Seen[Index] = true;
+  }
+  return false;
+}
+
 /// getEncodedElementAccess - We encode the fields as a llvm ConstantArray.
 void ExtVectorElementExpr::getEncodedElementAccess(
     SmallVectorImpl<uint32_t> &Elts) const {
@@ -4476,6 +4547,59 @@ void ExtVectorElementExpr::getEncodedElementAccess(
   }
 }
 
+void MatrixElementExpr::getEncodedElementAccess(
+    SmallVectorImpl<uint32_t> &Elts) const {
+  StringRef Comp = Accessor->getName();
+  assert(!Comp.empty() && Comp[0] == '_' && "invalid matrix accessor");
+
+  const ConstantMatrixType *MT =
+      getBase()->getType()->getAs<ConstantMatrixType>();
+  assert(MT && "MatrixElementExpr base must be a matrix type");
+
+  unsigned Rows = MT->getNumRows();
+  unsigned Cols = MT->getNumColumns();
+
+  // Zero-indexed: _mRC (4 chars per component: '_', 'm', row, col)
+  // One-indexed:  _RC  (3 chars per component: '_', row, col)
+  bool IsZeroIndexed = false;
+  unsigned ChunkLen = 0;
+
+  if (Comp.size() >= 2 && Comp[0] == '_' && Comp[1] == 'm') {
+    IsZeroIndexed = true;
+    ChunkLen = 4;
+  } else {
+    IsZeroIndexed = false;
+    ChunkLen = 3;
+  }
+
+  assert(ChunkLen != 0 && "unrecognized matrix swizzle format");
+  assert(Comp.size() % ChunkLen == 0 &&
+         "matrix swizzle accessor has invalid length");
+
+  for (unsigned i = 0, e = Comp.size(); i < e; i += ChunkLen) {
+    unsigned Row = 0, Col = 0;
+
+    if (IsZeroIndexed) {
+      // Pattern: _mRC
+      assert(Comp[i] == '_' && Comp[i + 1] == 'm' &&
+             "invalid zero-indexed matrix swizzle component");
+      Row = static_cast<unsigned>(Comp[i + 2] - '0'); // 0..Rows-1
+      Col = static_cast<unsigned>(Comp[i + 3] - '0'); // 0..Cols-1
+    } else {
+      // Pattern: _RC
+      assert(Comp[i] == '_' && "invalid one-indexed matrix swizzle component");
+      Row = static_cast<unsigned>(Comp[i + 1] - '1'); // 1..Rows -> 0..Rows-1
+      Col = static_cast<unsigned>(Comp[i + 2] - '1'); // 1..Cols -> 0..Cols-1
+    }
+
+    // Sema should have validated these, but assert here for sanity.
+    assert(Row < Rows && Col < Cols && "matrix swizzle index out of range");
+
+    unsigned Index = Row * Cols + Col;
+    Elts.push_back(Index);
+  }
+}
+
 ShuffleVectorExpr::ShuffleVectorExpr(const ASTContext &C, ArrayRef<Expr *> args,
                                      QualType Type, SourceLocation BLoc,
                                      SourceLocation RP)
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index 9995d1b411c5b..c77df5e33d4b5 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -63,6 +63,7 @@ Cl Expr::ClassifyImpl(ASTContext &Ctx, SourceLocation *Loc) const {
   case Cl::CL_Void:
   case Cl::CL_AddressableVoid:
   case Cl::CL_DuplicateVectorComponents:
+  case Cl::CL_DuplicateMatrixComponents:
   case Cl::CL_MemberFunction:
   case Cl::CL_SubObjCPropertySetting:
   case Cl::CL_ClassTemporary:
@@ -372,6 +373,16 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
       return Cl::CL_LValue;
     return ClassifyInternal(Ctx, cast<ExtVectorElementExpr>(E)->getBase());
 
+  // Matrix element access is an lvalue unless there are duplicates
+  // in the shuffle expression.
+  case Expr::MatrixElementExprClass:
+    if (cast<MatrixElementExpr>(E)->containsDuplicateElements())
+      return Cl::CL_DuplicateMatrixComponents;
+    // NOTE: MatrixElementExpr is currently only used by HLSL which does not
+    // have pointers so there is no isArrow() necessary or way to test
+    // Cl::CL_LValue
+    return ClassifyInternal(Ctx, cast<MatrixElementExpr>(E)->getBase());
+
     // Simply look at the actual default argument.
   case Expr::CXXDefaultArgExprClass:
     return ClassifyInternal(Ctx, cast<CXXDefaultArgExpr>(E)->getExpr());
@@ -738,6 +749,8 @@ Expr::LValueClassification Expr::ClassifyLValue(ASTContext &Ctx) const {
   case Cl::CL_Void: return LV_InvalidExpression;
   case Cl::CL_AddressableVoid: return LV_IncompleteVoidType;
   case Cl::CL_DuplicateVectorComponents: return LV_DuplicateVectorComponents;
+  case Cl::CL_DuplicateMatrixComponents:
+    return LV_DuplicateMatrixComponents;
   case Cl::CL_MemberFunction: return LV_MemberFunction;
   case Cl::CL_SubObjCPropertySetting: return LV_SubObjCPropertySetting;
   case Cl::CL_ClassTemporary: return LV_ClassTemporary;
@@ -759,6 +772,8 @@ Expr::isModifiableLvalue(ASTContext &Ctx, SourceLocation *Loc) const {
   case Cl::CL_Void: return MLV_InvalidExpression;
   case Cl::CL_AddressableVoid: return MLV_IncompleteVoidType;
   case Cl::CL_DuplicateVectorComponents: return MLV_DuplicateVectorComponents;
+  case Cl::CL_DuplicateMatrixComponents:
+    return MLV_DuplicateMatrixComponents;
   case Cl::CL_MemberFunction: return MLV_MemberFunction;
   case Cl::CL_SubObjCPropertySetting: return MLV_SubObjCPropertySetting;
   case Cl::CL_ClassTemporary: return MLV_ClassTemporary;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 2a228d2896730..ed8d795728343 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -8995,6 +8995,8 @@ class ExprEvaluatorBase
            DerivedSuccess(Result, E);
   }
 
+  bool VisitMatrixElementExpr(const MatrixElementExpr *E) { return false; }
+
   bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     APValue Val;
     if (!Evaluate(Val, Info, E->getBase()))
@@ -9337,6 +9339,7 @@ class LValueExprEvaluator
   bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
   bool VisitArraySubscriptExpr(const ArraySubscriptExpr *E);
   bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E);
+  bool VisitMatrixElementExpr(const MatrixElementExpr *E);
   bool VisitUnaryDeref(const UnaryOperator *E);
   bool VisitUnaryReal(const UnaryOperator *E);
   bool VisitUnaryImag(const UnaryOperator *E);
@@ -9698,6 +9701,10 @@ bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) {
   return LValueExprEvaluatorBaseTy::VisitMemberExpr(E);
 }
 
+bool LValueExprEvaluator::VisitMatrixElementExpr(const MatrixElementExpr *E) {
+  return false;
+}
+
 bool LValueExprEvaluator::VisitExtVectorElementExpr(
     const ExtVectorElementExpr *E) {
   bool Success = true;
@@ -21025,6 +21032,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::CompoundAssignOperatorClass:
   case Expr::CompoundLiteralExprClass:
   case Expr::ExtVectorElementExprClass:
+  case Expr::MatrixElementExprClass:
   case Expr::DesignatedInitExprClass:
   case Expr::ArrayInitLoopExprClass:
   case Expr::ArrayInitIndexExprClass:
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index fa28c0d444cc4..1310b32bf9fe3 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4956,6 +4956,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
   case Expr::ChooseExprClass:
   case Expr::CompoundLiteralExprClass:
   case Expr::ExtVectorElementExprClass:
+  case Expr::MatrixElementExprClass:
   case Expr::GenericSelectionExprClass:
   case Expr::ObjCEncodeExprClass:
   case Expr::ObjCIsaExprClass:
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 4d1ad387b8e8d..354d720f2882c 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1826,6 +1826,12 @@ void StmtPrinter::VisitExtVectorElementExpr(ExtVectorElementExpr *Node) {
   OS << Node->getAccessor().getName();
 }
 
+void StmtPrinter::VisitMatrixElementExpr(MatrixElementExpr *Node) {
+  PrintExpr(Node->getBase());
+  OS << ".";
+  OS << Node->getAccessor().getName();
+}
+
 void StmtPrinter::VisitCStyleCastExpr(CStyleCastExpr *Node) {
   OS << '(';
   Node->getTypeAsWritten().print(OS, Policy);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index efabe9809c361..4922e4d65162f 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1680,6 +1680,11 @@ void StmtProfiler::VisitExtVectorElementExpr(const ExtVectorElementExpr *S) {
   VisitName(&S->getAccessor());
 }
 
+void StmtProfiler::VisitMatrixElementExpr(const MatrixElementExpr *S) {
+  VisitExpr(S);
+  VisitName(&S->getAccessor());
+}
+
 void StmtProfiler::VisitBlockExpr(const BlockExpr *S) {
   VisitExpr(S);
   VisitDecl(S->getBlockDecl());
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 7bc0404db1bee..aebfb9fa53fa1 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -1675,6 +1675,10 @@ void TextNodeDumper::VisitExtVectorElementExpr(
   OS << " " << Node->getAccessor().getNameStart();
 }
 
+void TextNodeDumper::VisitMatrixElementExpr(const MatrixElementExpr *Node) {
+  OS << " " << Node->getAccessor().getNameStart();
+}
+
 void TextNodeDumper::VisitBinaryOperator(const BinaryOperator *Node) {
   OS << " '" << BinaryOperator::getOpcodeStr(Node->getOpcode()) << "'";
   if (Node->hasStoredFPFeatures())
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 4ce38f4e0a1f9..5118c3f3bca8e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -400,6 +400,8 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
 
   mlir::Value VisitExtVectorElementExpr(Expr *e) { return emitLoadOfLValue(e); }
 
+  mlir::Value VisitMatrixElementExpr(Expr *e) { return emitLoadOfLValue(e); }
+
   mlir::Value VisitMemberExpr(MemberExpr *e);
 
   mlir::Value VisitCompoundLiteralExpr(CompoundLiteralExpr *e) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index ac66d00950f05..70e85ad02998f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -1001,6 +1001,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
     return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
   case Expr::ExtVectorElementExprClass:
     return emitExtVectorElementExpr(cast<ExtVectorElementExpr>(e));
+  case Expr::MatrixElementExprClass:
+    return emitMatrixElementExpr(cast<MatrixElementExpr>(e));
   case Expr::UnaryOperatorClass:
     return emitUnaryOpLValue(cast<UnaryOperator>(e));
   case Expr::StringLiteralClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index adcf4d56e3892..3a6663acbbf47 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1357,6 +1357,8 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   LValue emitExtVectorElementExpr(const ExtVectorElementExpr *e);
 
+  LValue emitMatrixElementExpr(cost MatrixElementExpr *e);
+
   Address emitArrayToPointerDecay(const Expr *e,
                                   LValueBaseInfo *baseInfo = nullptr);
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 2a5ae8da72512..a826d8cdf934c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -29,6 +29,7 @@
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/InferAlloc.h"
 #include "clang/AST/NSAPI.h"
 #include "clang/AST/ParentMapContext.h"
@@ -1827,6 +1828,8 @@ LValue CodeGenFunction::EmitLValueHelper(const Expr *E,
     return EmitArraySectionExpr(cast<ArraySectionExpr>(E));
   case Expr::ExtVectorElementExprClass:
     return EmitExtVectorElementExpr(cast<ExtVectorElementExpr>(E));
+  case Expr::MatrixElementExprClass:
+    return EmitMatrixElementExpr(cast<MatrixElementExpr>(E));
   case Expr::CXXThisExprClass:
     return MakeAddrLValue(LoadCXXThisAddress(), E->getType());
   case Expr::MemberExprClass:
@@ -2295,6 +2298,50 @@ static RawAddress MaybeConvertMatrixAddress(RawAddress Addr,
   return Addr;
 }
 
+LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
+  // return EmitUnsupportedLValue(E, "Matrix swizzle");
+  LValue Base;
+  if (E->getBase()->isGLValue())
+    Base = EmitLValue(E->getBase());
+  else {
+    assert(E->getBase()->getType()->isConstantMatrixType() &&
+           "Result must be a Constant Matrix");
+    llvm::Value *Mat = EmitScalarExpr(E->getBase());
+    Address MatMem = CreateMemTemp(E->getBase()->getType());
+    QualType Ty = E->getBase()->getType();
+    llvm::Type *LTy = convertTypeForLoadStore(Ty, Mat->getType());
+    if (LTy->getScalarSizeInBits() > Mat->getType()->getScalarSizeInBits())
+      Mat = Builder.CreateZExt(Mat, LTy);
+    Builder.CreateStore(Mat, MatMem);
+    Base = MakeAddrLValue(MatMem, Ty, AlignmentSource::Decl);
+  }
+  QualType type =
+      E->getType().withCVRQualifiers(Base.getQuals().getCVRQualifiers());
+
+  // Encode the element access list into a vector of unsigned indices.
+  SmallVector<uint32_t, 4> Indices;
+  E->getEncodedElementAccess(Indices);
+
+  if (Base.isSimple()) {
+    llvm::Constant *CV =
+        llvm::ConstantDataVector::get(getLLVMContext(), Indices);
+    return LValue::MakeExtVectorElt(
+        MaybeConvertMatrixAddress(Base.getAddress(), *this), CV, type,
+        Base.getBaseInfo(), TBAAAccessInfo());
+  }
+  assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
+
+  llvm::Constant *BaseElts = Base.getExtVectorElts();
+  SmallVector<llvm::Constant *, 4> CElts;
+
+  for (unsigned Index : Indices)
+    CElts.push_back(BaseElts->getAggregateElement(Index));
+  llvm::Constant *CV = llvm::ConstantVector::get(CElts);
+  return LValue::MakeExtVectorElt(
+      MaybeConvertMatrixAddress(Base.getExtVectorAddress(), *this), CV, type,
+      Base.getBaseInfo(), TBAAAccessInfo());
+}
+
 // Emit a store of a matrix LValue. This may require casting the original
 // pointer to memory address (ArrayType) to a pointer to the value type
 // (VectorType).
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 35e2c65a8e112..32a0f4db88971 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -608,6 +608,7 @@ class ScalarExprEmitter
   Value *VisitConvertVectorExpr(ConvertVectorExpr *E);
   Value *VisitMemberExpr(MemberExpr *E);
   Value *VisitExtVectorElementExpr(Expr *E) { return EmitLoadOfLValue(E); }
+  Value *VisitMatrixElementExpr(Expr *E) { return EmitLoadOfLValue(E); }
   Value *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
     // Strictly speaking, we shouldn't be calling EmitLoadOfLValue, which
     // transitively calls EmitCompoundLiteralLValue, here in C++ since compound
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1073de1d25ec7..e28579d8f0a06 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4438,6 +4438,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   LValue EmitArraySectionExpr(const ArraySectionExpr *E,
                               bool IsLowerBound = true);
   LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
+  LValue EmitMatrixElementExpr(const MatrixElementExpr *E);
   LValue EmitMemberExpr(const MemberExpr *E);
   LValue EmitObjCIsaExpr(const ObjCIsaExpr *E);
   LValue EmitCompoundLiteralLValue(const CompoundLiteralExpr *E);
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 6208d6679df73..31f36700aedff 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1276,6 +1276,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Expr::DesignatedInitUpdateExprClass:
   case Expr::ExprWithCleanupsClass:
   case Expr::ExtVectorElementExprClass:
+  case Expr::MatrixElementExprClass:
   case Expr::InitListExprClass:
   case Expr::ArrayInitLoopExprClass:
   case Expr::MemberExprClass:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5795a71b5cae8..56c59e68cfe6a 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14137,6 +14137,9 @@ static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) {
   case Expr::MLV_DuplicateVectorComponents:
     DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue;
     break;
+  case Expr::MLV_DuplicateMatrixComponents:
+    DiagID = diag::err_typecheck_duplicate_matrix_components_not_mlvalue;
+    break;
   case Expr::MLV_NoSetterProperty:
     llvm_unreachable("readonly properties should be processed differently");
   case Expr::MLV_InvalidMessageExpression:
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index aedfc5e88b1c6..80d0f4945db67 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -14,11 +14,13 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/TypeBase.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
+#include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenMP.h"
 
@@ -1617,6 +1619,21 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R,
         ExtVectorElementExpr(ret, VK, BaseExpr.get(), *Member, MemberLoc);
   }
 
+  if (S.getLangOpts().HLSL && BaseType->isConstantMatrixType()) {
+    IdentifierInfo *Member = MemberName.getAsIdentifierInfo();
+    ExprValueKind VK = BaseExpr.get()->getValueKind();
+    QualType Ret = S.HLSL().CheckMatrixComponent(S, BaseType, VK, OpLoc, Member,
+                                                 MemberLoc);
+    if (Ret.isNull())
+      return ExprError();
+    Qualifiers BaseQ =
+        S.Context.getCanonicalType(BaseExpr.get()->getType()).getQualifiers();
+    Ret = S.Context.getQualifiedType(Ret, BaseQ);
+
+    return new (S.Context)
+        MatrixElementExpr(Ret, VK, BaseExpr.get(), *Member, MemberLoc);
+  }
+
   // Adjust builtin-sel to the appropriate redefinition type if that's
   // not just a pointer to builtin-sel again.
   if (IsArrow && BaseType->isSpecificBuiltinType(BuiltinType::ObjCSel) &&
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index f15b274a65a53..347df16cbd2a0 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4796,6 +4796,205 @@ bool SemaHLSL::transformInitList(const InitializedEntity &Entity,
   return true;
 }
 
+QualType SemaHLSL::CheckMatrixComponent(Sema &S, QualType baseType,
+                                        ExprValueKind &VK, SourceLocation OpLoc,
+                                        const IdentifierInfo *CompName,
+                                        SourceLocation CompLoc) {
+  const auto *MT = baseType->getAs<ConstantMatrixType>();
+  StringRef AccessorName = CompName->getName();
+  assert(MT &&
+         "CheckMatrixComponent is intended to be used on ConstantMatrixType");
+  assert(!AccessorName.empty() && "Matrix Accessor must have a name");
+
+  unsigned Rows = MT->getNumRows();
+  unsigned Cols = MT->getNumColumns();
+  bool IsZeroBasedAccessor = false;
+  unsigned ChunkLen = 0;
+  if (AccessorName.size() < 2) {
+    const char Expected[] = "length 4 for zero based: \'_mRC\' or length 3 for "
+                            "one-based: \'_RC\' accessor";
+    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
+        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
+        << SourceRange(CompLoc);
+    return QualType();
+  }
+  if (AccessorName[0] == '_' && AccessorName[1] == 'm') {
+    IsZeroBasedAccessor = true; // zero-based: 00..33
+    ChunkLen = 4;               // zero-based: "_mRC"
+  } else if (AccessorName[0] == '_')
+    // one-based: 11..44
+    ChunkLen = 3; // one-based: "_RC"
+  else {
+    const char Expected[] =
+        "zero based: \'_mRC\' or one-based: \'_RC\' accessor";
+    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
+        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
+        << SourceRange(CompLoc);
+    return QualType();
+  }
+
+  if (IsZeroBasedAccessor && AccessorName.size() < 4) {
+    const char Expected[] = "zero based: \'_mRC\' accessor";
+    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
+        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
+        << SourceRange(CompLoc);
+    return QualType();
+  }
+
+  if (AccessorName.size() < 3) {
+    const char Expected[] = "one-based: \'_RC\' accessor";
+    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
+        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
+        << SourceRange(CompLoc);
+    return QualType();
+  }
+
+  auto isDigit = [](char c) { return c >= '0' && c <= '9'; };
+  auto isZeroBasedIndex = [](int i) { return i >= 0 && i <= 3; };
+  auto isOneBasedIndex = [](int i) { return i >= 1 && i <= 4; };
+
+  bool HasRepeated = false;
+  SmallVector<bool, 16> Seen(Rows * Cols, false);
+  unsigned NumComponents = 0;
+  const char *Begin = AccessorName.data();
+
+  for (unsigned I = 0, E = AccessorName.size(); I < E; I += ChunkLen) {
+    const char *Chunk = Begin + I;
+    char RowChar = 0, ColChar = 0;
+    if (IsZeroBasedAccessor) {
+      // Zero-based: "_mRC"
+      if (Chunk[0] != '_' || Chunk[1] != 'm') {
+        char Bad = (Chunk[0] != '_') ? Chunk[0] : Chunk[1];
+        const char Expected[] = "\'_m\' prefix";
+        S.Diag(OpLoc.getLocWithOffset(I + (Bad == Chunk[0] ? 1 : 2)),
+               diag::err_builtin_matrix_invalid_member)
+            << StringRef(&Bad, 1) << StringRef(Expected, sizeof(Expected) - 1)
+            << SourceRange(CompLoc);
+        return QualType();
+      }
+      RowChar = Chunk[2];
+      ColChar = Chunk[3];
+    } else {
+      // One-based: "_RC"
+      if (Chunk[0] != '_') {
+        const char Expected[] = "\'_\' prefix";
+        S.Diag(OpLoc.getLocWithOffset(I + 1),
+               diag::err_builtin_matrix_invalid_member)
+            << StringRef(&Chunk[0], 1)
+            << StringRef(Expected, sizeof(Expected) - 1)
+            << SourceRange(CompLoc);
+        return QualType();
+      }
+      RowChar = Chunk[1];
+      ColChar = Chunk[2];
+    }
+
+    // Must be digits.
+    bool isDigitsError = false;
+    if (!isDigit(RowChar)) {
+      const char Expected[] = "row as integer";
+      unsigned BadPos = IsZeroBasedAccessor ? 2 : 1;
+      S.Diag(OpLoc.getLocWithOffset(I + BadPos + 1),
+             diag::err_builtin_matrix_invalid_member)
+          << StringRef(&RowChar, 1) << StringRef(Expected, sizeof(Expected) - 1)
+          << SourceRange(CompLoc);
+      isDigitsError = true;
+    }
+
+    if (!isDigit(ColChar)) {
+      const char Expected[] = "column as integer";
+      unsigned BadPos = IsZeroBasedAccessor ? 3 : 2;
+      S.Diag(OpLoc.getLocWithOffset(I + BadPos + 1),
+             diag::err_builtin_matrix_invalid_member)
+          << StringRef(&ColChar, 1) << StringRef(Expected, sizeof(Expected) - 1)
+          << SourceRange(CompLoc);
+      isDigitsError = true;
+    }
+    if (isDigitsError)
+      return QualType();
+
+    unsigned Row = RowChar - '0';
+    unsigned Col = ColChar - '0';
+
+    bool HasIndexingError = false;
+    if (IsZeroBasedAccessor) {
+      // 0-based [0..3]
+      if (!isZeroBasedIndex(Row)) {
+        S.Diag(OpLoc, diag::err_hlsl_matrix_element_not_in_bounds)
+            << /*row*/ 0 << /*zero-based*/ 0 << SourceRange(CompLoc);
+        HasIndexingError = true;
+      }
+      if (!isZeroBasedIndex(Col)) {
+        S.Diag(OpLoc, diag::err_hlsl_matrix_element_not_in_bounds)
+            << /*col*/ 1 << /*zero-based*/ 0 << SourceRange(CompLoc);
+        HasIndexingError = true;
+      }
+    } else {
+      // 1-based [1..4]
+      if (!isOneBasedIndex(Row)) {
+        S.Diag(OpLoc, diag::err_hlsl_matrix_element_not_in_bounds)
+            << /*row*/ 0 << /*one-based*/ 1 << SourceRange(CompLoc);
+        HasIndexingError = true;
+      }
+      if (!isOneBasedIndex(Col)) {
+        S.Diag(OpLoc, diag::err_hlsl_matrix_element_not_in_bounds)
+            << /*col*/ 1 << /*one-based*/ 1 << SourceRange(CompLoc);
+        HasIndexingError = true;
+      }
+      // Convert to 0-based after range checking.
+      Row--;
+      Col--;
+    }
+
+    if (HasIndexingError)
+      return QualType();
+
+    // Note: matrix swizzle index is hard coded. That means Row and Col can
+    // potentially be larger than Rows and Cols if matrix size is less than
+    // the max index size.
+    bool HasBoundsError = false;
+    if (Row >= Rows) {
+      Diag(OpLoc, diag::err_hlsl_matrix_index_out_of_bounds)
+          << /*Row*/ 0 << Row << Rows << SourceRange(CompLoc);
+      HasBoundsError = true;
+    }
+    if (Col >= Cols) {
+      Diag(OpLoc, diag::err_hlsl_matrix_index_out_of_bounds)
+          << /*Col*/ 1 << Col << Cols << SourceRange(CompLoc);
+      HasBoundsError = true;
+    }
+    if (HasBoundsError)
+      return QualType();
+
+    unsigned FlatIndex = Row * Cols + Col;
+    if (Seen[FlatIndex])
+      HasRepeated = true;
+    Seen[FlatIndex] = true;
+    ++NumComponents;
+  }
+  if (NumComponents == 0 || NumComponents > 4) {
+    S.Diag(OpLoc, diag::err_hlsl_matrix_swizzle_invalid_length)
+        << NumComponents << SourceRange(CompLoc);
+    return QualType();
+  }
+
+  QualType ElemTy = MT->getElementType();
+  QualType VT = S.Context.getExtVectorType(ElemTy, NumComponents);
+  if (HasRepeated)
+    VK = VK_PRValue;
+
+  for (Sema::ExtVectorDeclsType::iterator
+           I = S.ExtVectorDecls.begin(S.getExternalSource()),
+           E = S.ExtVectorDecls.end();
+       I != E; ++I) {
+    if ((*I)->getUnderlyingType() == VT)
+      return S.Context.getTypedefType(ElaboratedTypeKeyword::None,
+                                      /*Qualifier=*/std::nullopt, *I);
+  }
+
+  return VT;
+}
+
 bool SemaHLSL::handleInitialization(VarDecl *VDecl, Expr *&Init) {
   const HLSLVkConstantIdAttr *ConstIdAttr =
       VDecl->getAttr<HLSLVkConstantIdAttr>();
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index ce4318a71ee7a..b1ce75f634823 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -3098,14 +3098,15 @@ class TreeTransform {
                                               Init);
   }
 
-  /// Build a new extended vector element access expression.
+  /// Build a new extended vector or matrix element access expression.
   ///
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
-  ExprResult RebuildExtVectorElementExpr(Expr *Base, SourceLocation OpLoc,
-                                         bool IsArrow,
-                                         SourceLocation AccessorLoc,
-                                         IdentifierInfo &Accessor) {
+  ExprResult RebuildExtVectorOrMatrixElementExpr(Expr *Base,
+                                                 SourceLocation OpLoc,
+                                                 bool IsArrow,
+                                                 SourceLocation AccessorLoc,
+                                                 IdentifierInfo &Accessor) {
 
     CXXScopeSpec SS;
     DeclarationNameInfo NameInfo(&Accessor, AccessorLoc);
@@ -13961,11 +13962,29 @@ TreeTransform<Derived>::TransformExtVectorElementExpr(ExtVectorElementExpr *E) {
   // FIXME: Bad source location
   SourceLocation FakeOperatorLoc =
       SemaRef.getLocForEndOfToken(E->getBase()->getEndLoc());
-  return getDerived().RebuildExtVectorElementExpr(
+  return getDerived().RebuildExtVectorOrMatrixElementExpr(
       Base.get(), FakeOperatorLoc, E->isArrow(), E->getAccessorLoc(),
       E->getAccessor());
 }
 
+template <typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformMatrixElementExpr(MatrixElementExpr *E) {
+  ExprResult Base = getDerived().TransformExpr(E->getBase());
+  if (Base.isInvalid())
+    return ExprError();
+
+  if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase())
+    return E;
+
+  // FIXME: Bad source location
+  SourceLocation FakeOperatorLoc =
+      SemaRef.getLocForEndOfToken(E->getBase()->getEndLoc());
+  return getDerived().RebuildExtVectorOrMatrixElementExpr(
+      Base.get(), FakeOperatorLoc, /*isArrow*/ false, E->getAccessorLoc(),
+      E->getAccessor());
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformInitListExpr(InitListExpr *E) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 5553139dfaa46..655c05ce4bf69 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1231,6 +1231,13 @@ void ASTStmtReader::VisitExtVectorElementExpr(ExtVectorElementExpr *E) {
   E->setAccessorLoc(readSourceLocation());
 }
 
+void ASTStmtReader::VisitMatrixElementExpr(MatrixElementExpr *E) {
+  VisitExpr(E);
+  E->setBase(Record.readSubExpr());
+  E->setAccessor(Record.readIdentifier());
+  E->setAccessorLoc(readSourceLocation());
+}
+
 void ASTStmtReader::VisitInitListExpr(InitListExpr *E) {
   VisitExpr(E);
   if (auto *SyntForm = cast_or_null<InitListExpr>(Record.readSubStmt()))
@@ -3377,6 +3384,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) ExtVectorElementExpr(Empty);
       break;
 
+    case EXPR_MATRIX_ELEMENT:
+      S = new (Context) MatrixElementExpr(Empty);
+      break;
+
     case EXPR_INIT_LIST:
       S = new (Context) InitListExpr(Empty);
       break;
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 8f22156f93487..87b006b2c4d7f 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1200,6 +1200,14 @@ void ASTStmtWriter::VisitExtVectorElementExpr(ExtVectorElementExpr *E) {
   Code = serialization::EXPR_EXT_VECTOR_ELEMENT;
 }
 
+void ASTStmtWriter::VisitMatrixElementExpr(MatrixElementExpr *E) {
+  VisitExpr(E);
+  Record.AddStmt(E->getBase());
+  Record.AddIdentifierRef(&E->getAccessor());
+  Record.AddSourceLocation(E->getAccessorLoc());
+  Code = serialization::EXPR_MATRIX_ELEMENT;
+}
+
 void ASTStmtWriter::VisitInitListExpr(InitListExpr *E) {
   VisitExpr(E);
   // NOTE: only add the (possibly null) syntactic form.
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index a6a96b594fe85..2c80940e706dc 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1930,6 +1930,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::DesignatedInitUpdateExprClass:
     case Stmt::ArrayInitIndexExprClass:
     case Stmt::ExtVectorElementExprClass:
+    case Stmt::MatrixElementExprClass:
     case Stmt::ImaginaryLiteralClass:
     case Stmt::ObjCAtCatchStmtClass:
     case Stmt::ObjCAtFinallyStmtClass:
diff --git a/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl b/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
new file mode 100644
index 0000000000000..b403d27b29760
--- /dev/null
+++ b/clang/test/AST/HLSL/matrix-member-access-scalar.hlsl
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s 
+
+typedef float float3x3 __attribute__((matrix_type(3,3)));
+
+[numthreads(1,1,1)]
+void ok() {
+    float3x3 A;
+
+   // CHECK:      BinaryOperator {{.*}} 'vector<float, 1>' lvalue '='
+   // CHECK-NEXT: MatrixElementExpr {{.*}} 'vector<float, 1>' lvalue _m12
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <VectorSplat>
+   // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 3.140000e+00
+    A._m12 = 3.14;
+
+   // CHECK: VarDecl {{.*}} r 'float' cinit
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <HLSLVectorTruncation>
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <LValueToRValue>
+   // CHECK-NEXT: MatrixElementExpr {{.*}}  'vector<float, 1>' lvalue _m00
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+    float r = A._m00;
+
+   // CHECK: VarDecl {{.*}} good1 'float' cinit
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <HLSLVectorTruncation>
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <LValueToRValue>
+   // CHECK-NEXT: MatrixElementExpr {{.*}}  'vector<float, 1>' lvalue _11
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+    float good1 = A._11;
+
+   // CHECK:      BinaryOperator {{.*}} 'vector<float, 1>' lvalue '='
+   // CHECK-NEXT: MatrixElementExpr {{.*}} 'vector<float, 1>' lvalue _33
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <VectorSplat>
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <LValueToRValue>
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue Var {{.*}} 'R' 'float'
+    float R;
+    A._33 = R;
+}
diff --git a/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-dump-json.hlsl b/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-dump-json.hlsl
new file mode 100644
index 0000000000000..afb144624696a
--- /dev/null
+++ b/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-dump-json.hlsl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl \
+// RUN:   -ast-dump=json -disable-llvm-passes -o - -hlsl-entry main %s \
+// RUN:   | FileCheck %s
+
+typedef float float4x4 __attribute__((matrix_type(4,4)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+
+float4x4 gMat;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float4x4 A = gMat;
+
+  // one-based swizzle
+  float3 v1 = A._11_22_33;
+
+  // zero-based swizzle
+  float2 v2 = A._m00_m11;
+}
+
+// CHECK: "kind": "MatrixElementExpr"
+// CHECK-NEXT: "range": {
+// CHECK: "kind": "MatrixElementExpr"
+// CHECK-NEXT: "range": {
diff --git a/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl b/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl
new file mode 100644
index 0000000000000..fa265987bd000
--- /dev/null
+++ b/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl \
+// RUN:   -ast-print -disable-llvm-passes -o - -hlsl-entry main %s \
+// RUN:   | FileCheck %s
+
+typedef float float4x4 __attribute__((matrix_type(4,4)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+
+float4x4 gMat;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float4x4 A = gMat;
+  float3 v1 = A._11_22_33;
+  float2 v2 = A._m00_m11;
+}
+
+// CHECK: float4x4 gMat;
+// CHECK: float4x4 A = gMat;
+// CHECK: float3 v1 = A._11_22_33;
+// CHECK: float2 v2 = A._m00_m11;
diff --git a/clang/test/AST/HLSL/matrix-member-access-swizzle.hlsl b/clang/test/AST/HLSL/matrix-member-access-swizzle.hlsl
new file mode 100644
index 0000000000000..2d0169e799802
--- /dev/null
+++ b/clang/test/AST/HLSL/matrix-member-access-swizzle.hlsl
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s 
+
+typedef float float3x3 __attribute__((matrix_type(3,3)));
+typedef float float4x4 __attribute__((matrix_type(4,4)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+[numthreads(1,1,1)]
+void ok() {
+    float3x3 A;
+
+   // CHECK:      BinaryOperator {{.*}} 'float2':'vector<float, 2>' lvalue '='
+   // CHECK-NEXT: MatrixElementExpr {{.*}} 'float2':'vector<float, 2>' lvalue _m12_m21
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+   // CHECK-NEXT: ExtVectorElementExpr {{.*}} 'float2':'vector<float, 2>' xx
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 1>' <VectorSplat>
+   // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 3.140000e+00
+    A._m12_m21 = 3.14.xx;
+
+   // CHECK: VarDecl {{.*}}r 'float2':'vector<float, 2>' cinit
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector<float, 2>' <LValueToRValue>
+   // CHECK-NEXT: MatrixElementExpr {{.*}}  'float2':'vector<float, 2>' lvalue _m00_m11
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+    float2 r = A._m00_m11;
+
+   // CHECK: VarDecl {{.*}} good1 'float3':'vector<float, 3>' cinit
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float3':'vector<float, 3>' <LValueToRValue>
+   // CHECK-NEXT: MatrixElementExpr {{.*}}  'float3':'vector<float, 3>' lvalue _11_22_33
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+    float3 good1 = A._11_22_33;
+
+   // CHECK:      BinaryOperator {{.*}} 'float4':'vector<float, 4>' lvalue '='
+   // CHECK-NEXT: MatrixElementExpr {{.*}} 'float4':'vector<float, 4>' lvalue _11_22_33_44
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' lvalue Var {{.*}} 'B' 'float4x4':'matrix<float, 4, 4>'
+   // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector<float, 4>' <LValueToRValue>
+   // CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector<float, 4>' lvalue Var {{.*}} 'R' 'float4':'vector<float, 4>'
+    float4 R;
+    float4x4 B;
+    B._11_22_33_44 = R;
+
+    // CHECK: BinaryOperator {{.*}} 'float3':'vector<float, 3>' lvalue '='
+    // CHECK-NEXT: MatrixElementExpr {{.*}} 'float3':'vector<float, 3>' lvalue _11_22_33
+    // CHECK-NEXT: DeclRefExpr{{.*}} 'float3x3':'matrix<float, 3, 3>' lvalue Var {{.*}} 'A' 'float3x3':'matrix<float, 3, 3>'
+    // CHECK-NEXT: ImplicitCastExpr {{.*}}'float3':'vector<float, 3>' <LValueToRValue>
+    // CHECK-NEXT: ExtVectorElementExpr {{.*}} 'float3':'vector<float, 3>' lvalue vectorcomponent rgb
+    // CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector<float, 4>' lvalue Var {{.*}} 'R' 'float4':'vector<float, 4>'
+    A._11_22_33 = R.rgb;
+}
diff --git a/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl b/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl
new file mode 100644
index 0000000000000..605a119e3beac
--- /dev/null
+++ b/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -finclude-default-header -emit-pch -o %t %S/Inputs/pch.hlsl
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -finclude-default-header -include-pch %t -ast-dump-all %s | FileCheck  %s
+
+float4x4 gM;
+
+// CHECK: FunctionDecl {{.*}} getDiag 'float4 ()'
+// CHECK-NEXT: CompoundStmt {{.*}}
+// CHECK-NEXT: ReturnStmt {{.*}}
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector<float, 4>' <LValueToRValue>
+// CHECK-NEXT: MatrixElementExpr {{.*}} 'vector<float hlsl_constant, 4>' lvalue _11_22_33_44
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl_constant float4x4':'matrix<float hlsl_constant, 4, 4>' lvalue Var {{.*}}  'gM' 'hlsl_constant float4x4':'matrix<float hlsl_constant, 4, 4>'
+float4 getDiag() {  
+  return gM._11_22_33_44;
+}
+
+// CHECK: FunctionDecl {{.*}} setRowZero 'void (float4)'
+// CHECK-NEXT: ParmVarDecl {{.*}} used V 'float4':'vector<float, 4>'
+// CHECK-NEXT: CompoundStmt {{.*}}
+// CHECK-NEXT: BinaryOperator {{.*}} 'vector<float hlsl_constant, 4>' lvalue '='
+// CHECK-NEXT: MatrixElementExpr {{.*}} 'vector<float hlsl_constant, 4>' lvalue _m00_m01_m02_m03
+// CHECK-NEXT: DeclRefExpr {{.*}} 'hlsl_constant float4x4':'matrix<float hlsl_constant, 4, 4>' lvalue Var {{.*}} 'gM' 'hlsl_constant float4x4':'matrix<float hlsl_constant, 4, 4>'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector<float, 4>' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector<float, 4>' lvalue ParmVar {{.*}} 'V' 'float4':'vector<float, 4>'
+void setRowZero(float4 V) {  
+  gM._m00_m01_m02_m03 = V;
+}
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
new file mode 100644
index 0000000000000..1d3d4d17e0c8a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
@@ -0,0 +1,230 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return11u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return11(int4x4 A) {
+    return A._11;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return12u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return12(int4x4 A) {
+    return A._12;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return13u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 2>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return13(int4x4 A) {
+    return A._13;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return14u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 3>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return14(int4x4 A) {
+    return A._14;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return21u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 4>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return21(int4x4 A) {
+    return A._21;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return22u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 5>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return22(int4x4 A) {
+    return A._22;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return23u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 6>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return23(int4x4 A) {
+    return A._23;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return24u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 7>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return24(int4x4 A) {
+    return A._24;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return31u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 8>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return31(int4x4 A) {
+    return A._31;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return32u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 9>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return32(int4x4 A) {
+    return A._32;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return33u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 10>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return33(int4x4 A) {
+    return A._33;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return34u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 11>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return34(int4x4 A) {
+    return A._34;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return41u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 12>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return41(int4x4 A) {
+    return A._41;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return42u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 13>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return42(int4x4 A) {
+    return A._42;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return43u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 14>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return43(int4x4 A) {
+    return A._43;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z8Return44u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <1 x i32> <i32 15>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int Return44(int4x4 A) {
+    return A._44;
+}
+
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
new file mode 100644
index 0000000000000..5f80f924b024f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
@@ -0,0 +1,345 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat11Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat11(out int4x4 A, int I) {
+    return A._11 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat12Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat12(out int4x4 A, int I) {
+    return A._12 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat13Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat13(out int4x4 A, int I) {
+    return A._13 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat14Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 3
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat14(out int4x4 A, int I) {
+    return A._14 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat21Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat21(out int4x4 A, int I) {
+    return A._21 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat22Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 5
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat22(out int4x4 A, int I) {
+    return A._22 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat23Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat23(out int4x4 A, int I) {
+    return A._23 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat24Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat24(out int4x4 A, int I) {
+    return A._24 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat31Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat31(out int4x4 A, int I) {
+    return A._31 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat32Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat32(out int4x4 A, int I) {
+    return A._32 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat33Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 10
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat33(out int4x4 A, int I) {
+    return A._33 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat34Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 11
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat34(out int4x4 A, int I) {
+    return A._34 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat41Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat41(out int4x4 A, int I) {
+    return A._41 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat42Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat42(out int4x4 A, int I) {
+    return A._42 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat43Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat43(out int4x4 A, int I) {
+    return A._43 = I;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat44Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 15
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+//
+int StoreScalarAtMat44(out int4x4 A, int I) {
+    return A._44 = I;
+}
+
+//.
+// CHECK: [[META4]] = !{}
+// CHECK: [[META5]] = !{i64 4}
+//.
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-load.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-load.hlsl
new file mode 100644
index 0000000000000..31a56811473af
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-load.hlsl
@@ -0,0 +1,108 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z17ReturnOnesSwizzleu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnOnesSwizzle(int4x4 A) {
+    return A._11_12_13_14;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z18ReturnOnesSwizzle2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnOnesSwizzle2(int4x4 A) {
+    return A._11_21_31_41;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z17ReturnTwosSwizzleu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnTwosSwizzle(int4x4 A) {
+    return A._21_22_23_24;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z18ReturnTwosSwizzle2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnTwosSwizzle2(int4x4 A) {
+    return A._12_22_32_42;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z19ReturnThreesSwizzleu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnThreesSwizzle(int4x4 A) {
+    return A._31_32_33_34;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z20ReturnThreesSwizzle2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnThreesSwizzle2(int4x4 A) {
+    return A._13_23_33_43;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z18ReturnFoursSwizzleu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnFoursSwizzle(int4x4 A) {
+    return A._41_42_43_44;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z19ReturnFoursSwizzle2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int4 ReturnFoursSwizzle2(int4x4 A) {
+    return A._14_24_34_44;
+}
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-store.hlsl
new file mode 100644
index 0000000000000..ff7fab662a012
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-store.hlsl
@@ -0,0 +1,230 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden void @_Z19OnesSwizzleToScalarRu11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i32> [[CAST_SPLAT]], <1 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP2]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP7]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 3
+// CHECK-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// CHECK-NEXT:    ret void
+//
+void OnesSwizzleToScalar(out int4x4 A, int I) {
+    A._11_12_13_14 = I.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z19OnesSwizzleToVectorRu11matrix_typeILm4ELm4EiEDv4_i(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], <4 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[V]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP6]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP8]], align 4
+// CHECK-NEXT:    ret void
+//
+void OnesSwizzleToVector(out int4x4 A, int4 V) {
+    A._11_21_31_41 = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z19TwosSwizzleToScalarRu11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i32> [[CAST_SPLAT]], <1 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 5
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP6]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 6
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP8]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 7
+// CHECK-NEXT:    store i32 [[TMP9]], ptr [[TMP10]], align 4
+// CHECK-NEXT:    ret void
+//
+void TwosSwizzleToScalar(out int4x4 A, int I) {
+    A._21_22_23_24 = I.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z19TwosSwizzleToVectorRu11matrix_typeILm4ELm4EiEDv4_i(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], <4 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[V]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 5
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP7]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13
+// CHECK-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// CHECK-NEXT:    ret void
+//
+void TwosSwizzleToVector(out int4x4 A, int4 V) {
+    A._12_22_32_42 = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z21ThreesSwizzleToScalarRu11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i32> [[CAST_SPLAT]], <1 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 9
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP6]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 10
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP8]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 11
+// CHECK-NEXT:    store i32 [[TMP9]], ptr [[TMP10]], align 4
+// CHECK-NEXT:    ret void
+//
+void ThreesSwizzleToScalar(out int4x4 A, int I) {
+    A._31_32_33_34 = I.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z21ThreesSwizzleToVectorRu11matrix_typeILm4ELm4EiEDv4_i(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], <4 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[V]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 10
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP7]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14
+// CHECK-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// CHECK-NEXT:    ret void
+//
+void ThreesSwizzleToVector(out int4x4 A, int4 V) {
+    A._13_23_33_43 = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z20FoursSwizzleToScalarRu11matrix_typeILm4ELm4EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i32> [[CAST_SPLAT]], <1 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 12
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 13
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP6]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 14
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP8]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr <16 x i32>, ptr [[TMP2]], i32 0, i32 15
+// CHECK-NEXT:    store i32 [[TMP9]], ptr [[TMP10]], align 4
+// CHECK-NEXT:    ret void
+//
+void FoursSwizzleToScalar(out int4x4 A, int I) {
+    A._41_42_43_44 = I.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z20FoursSwizzleToVectorRu11matrix_typeILm4ELm4EiEDv4_i(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], <4 x i32> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[V]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14
+// CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP7]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 15
+// CHECK-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// CHECK-NEXT:    ret void
+//
+void FoursSwizzleToVector(out int4x4 A, int4 V) {
+    A._41_42_43_44 = V;
+}
+//.
+// CHECK: [[META4]] = !{}
+// CHECK: [[META5]] = !{i64 4}
+//.
diff --git a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
new file mode 100644
index 0000000000000..f99bbb2bf2b02
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl
@@ -0,0 +1,230 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return00u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return00(float4x4 A) {
+    return A._m00;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return01u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 1>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return01(float4x4 A) {
+    return A._m01;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return02u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 2>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return02(float4x4 A) {
+    return A._m02;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return03u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 3>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return03(float4x4 A) {
+    return A._m03;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return10u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 4>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return10(float4x4 A) {
+    return A._m10;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return11u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 5>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return11(float4x4 A) {
+    return A._m11;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return12u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 6>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return12(float4x4 A) {
+    return A._m12;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return13u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 7>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return13(float4x4 A) {
+    return A._m13;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return20u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 8>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return20(float4x4 A) {
+    return A._m20;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return21u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 9>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return21(float4x4 A) {
+    return A._m21;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return22u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 10>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return22(float4x4 A) {
+    return A._m22;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return23u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 11>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return23(float4x4 A) {
+    return A._m23;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return30u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 12>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return30(float4x4 A) {
+    return A._m30;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return31u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 13>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return31(float4x4 A) {
+    return A._m31;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return32u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 14>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return32(float4x4 A) {
+    return A._m32;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z8Return33u11matrix_typeILm4ELm4EfE(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <1 x i32> <i32 15>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[TMP1]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float Return33(float4x4 A) {
+    return A._m33;
+}
+
diff --git a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
new file mode 100644
index 0000000000000..3c11f5fc29515
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
@@ -0,0 +1,345 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat00Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat00(out float4x4 A, float F) {
+    return A._m00 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat01Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat01(out float4x4 A, float F) {
+    return A._m01 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat02Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 2
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat02(out float4x4 A, float F) {
+    return A._m02 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat03Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 3
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat03(out float4x4 A, float F) {
+    return A._m03 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat10Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 4
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat10(out float4x4 A, float F) {
+    return A._m10 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat11Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 5
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat11(out float4x4 A, float F) {
+    return A._m11 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat12Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 6
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat12(out float4x4 A, float F) {
+    return A._m12 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat13Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 7
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat13(out float4x4 A, float F) {
+    return A._m13 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat20Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 8
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat20(out float4x4 A, float F) {
+    return A._m20 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat21Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 9
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat21(out float4x4 A, float F) {
+    return A._m21 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat22Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 10
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat22(out float4x4 A, float F) {
+    return A._m22 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat23Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 11
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat23(out float4x4 A, float F) {
+    return A._m23 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat30Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 12
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat30(out float4x4 A, float F) {
+    return A._m30 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat31Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 13
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat31(out float4x4 A, float F) {
+    return A._m31 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat32Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 14
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat32(out float4x4 A, float F) {
+    return A._m32 = F;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat33Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 15
+// CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
+// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+//
+float StoreScalarAtMat33(out float4x4 A, float F) {
+    return A._m33 = F;
+}
+
+//.
+// CHECK: [[META4]] = !{}
+// CHECK: [[META5]] = !{i64 4}
+//.
diff --git a/clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-load.hlsl b/clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-load.hlsl
new file mode 100644
index 0000000000000..6a58094f59bc2
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-load.hlsl
@@ -0,0 +1,108 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z18ReturnZerosSwizzleu11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnZerosSwizzle(half4x4 A) {
+    return A._m00_m01_m02_m03;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z19ReturnZerosSwizzle2u11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnZerosSwizzle2(half4x4 A) {
+    return A._m00_m10_m20_m30;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z17ReturnOnesSwizzleu11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnOnesSwizzle(half4x4 A) {
+    return A._m10_m11_m12_m13;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z18ReturnOnesSwizzle2u11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnOnesSwizzle2(half4x4 A) {
+    return A._m01_m11_m21_m31;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z17ReturnTwosSwizzleu11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnTwosSwizzle(half4x4 A) {
+    return A._m20_m21_m22_m23;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z18ReturnTwosSwizzle2u11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnTwosSwizzle2(half4x4 A) {
+    return A._m02_m12_m22_m32;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z19ReturnThreesSwizzleu11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnThreesSwizzle(half4x4 A) {
+    return A._m30_m31_m32_m33;
+}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z20ReturnThreesSwizzle2u11matrix_typeILm4ELm4EDhE(
+// CHECK-SAME: <16 x half> noundef nofpclass(nan inf) [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca [16 x half], align 2
+// CHECK-NEXT:    store <16 x half> [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+// CHECK-NEXT:    ret <4 x half> [[TMP1]]
+//
+half4 ReturnThreesSwizzle2(half4x4 A) {
+    return A._m03_m13_m23_m33;
+}
diff --git a/clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-store.hlsl
new file mode 100644
index 0000000000000..72fbc015d5934
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-store.hlsl
@@ -0,0 +1,230 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: define hidden void @_Z20ZerosSwizzleToScalarRu11matrix_typeILm4ELm4EdEd(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], double noundef nofpclass(nan inf) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[D]], ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x double> [[CAST_SPLAT]], <1 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+// CHECK-NEXT:    store double [[TMP3]], ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 1
+// CHECK-NEXT:    store double [[TMP4]], ptr [[TMP5]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 2
+// CHECK-NEXT:    store double [[TMP6]], ptr [[TMP7]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 3
+// CHECK-NEXT:    store double [[TMP8]], ptr [[TMP9]], align 8
+// CHECK-NEXT:    ret void
+//
+void ZerosSwizzleToScalar(out double4x4 A, double D) {
+    A._m00_m01_m02_m03 = D.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z20ZerosSwizzleToVectorRu11matrix_typeILm4ELm4EdEDv4_d(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], <4 x double> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x double>, align 32
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x double> [[V]], ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    store double [[TMP2]], ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 4
+// CHECK-NEXT:    store double [[TMP3]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 8
+// CHECK-NEXT:    store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 12
+// CHECK-NEXT:    store double [[TMP7]], ptr [[TMP8]], align 8
+// CHECK-NEXT:    ret void
+//
+void ZerosSwizzleToVector(out double4x4 A, double4 V) {
+    A._m00_m10_m20_m30 = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z19OnesSwizzleToScalarRu11matrix_typeILm4ELm4EdEd(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], double noundef nofpclass(nan inf) [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[D]], ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x double> [[CAST_SPLAT]], <1 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 4
+// CHECK-NEXT:    store double [[TMP3]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 5
+// CHECK-NEXT:    store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 6
+// CHECK-NEXT:    store double [[TMP7]], ptr [[TMP8]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 7
+// CHECK-NEXT:    store double [[TMP9]], ptr [[TMP10]], align 8
+// CHECK-NEXT:    ret void
+//
+void OnesSwizzleToScalar(out double4x4 A, double D) {
+    A._m10_m11_m12_m13 = D.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z19OnesSwizzleToVectorRu11matrix_typeILm4ELm4EdEDv4_d(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], <4 x double> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x double>, align 32
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x double> [[V]], ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT:    store double [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 5
+// CHECK-NEXT:    store double [[TMP4]], ptr [[TMP5]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 9
+// CHECK-NEXT:    store double [[TMP6]], ptr [[TMP7]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 13
+// CHECK-NEXT:    store double [[TMP8]], ptr [[TMP9]], align 8
+// CHECK-NEXT:    ret void
+//
+void OnesSwizzleToVector(out double4x4 A, double4 V) {
+    A._m01_m11_m21_m31 = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z19TwosSwizzleToScalarRu11matrix_typeILm4ELm4EdEd(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], double noundef nofpclass(nan inf) [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[D]], ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x double> [[CAST_SPLAT]], <1 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 8
+// CHECK-NEXT:    store double [[TMP3]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 9
+// CHECK-NEXT:    store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 10
+// CHECK-NEXT:    store double [[TMP7]], ptr [[TMP8]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 11
+// CHECK-NEXT:    store double [[TMP9]], ptr [[TMP10]], align 8
+// CHECK-NEXT:    ret void
+//
+void TwosSwizzleToScalar(out double4x4 A, double D) {
+    A._m20_m21_m22_m23 = D.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z19TwosSwizzleToVectorRu11matrix_typeILm4ELm4EdEDv4_d(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], <4 x double> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x double>, align 32
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x double> [[V]], ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 2
+// CHECK-NEXT:    store double [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 6
+// CHECK-NEXT:    store double [[TMP4]], ptr [[TMP5]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 10
+// CHECK-NEXT:    store double [[TMP6]], ptr [[TMP7]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 14
+// CHECK-NEXT:    store double [[TMP8]], ptr [[TMP9]], align 8
+// CHECK-NEXT:    ret void
+//
+void TwosSwizzleToVector(out double4x4 A, double4 V) {
+    A._m02_m12_m22_m32 = V;
+}
+
+// CHECK-LABEL: define hidden void @_Z21ThreesSwizzleToScalarRu11matrix_typeILm4ELm4EdEd(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], double noundef nofpclass(nan inf) [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store double [[D]], ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8
+// CHECK-NEXT:    [[CAST_SPLAT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x double> [[CAST_SPLAT]], <1 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 12
+// CHECK-NEXT:    store double [[TMP3]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 13
+// CHECK-NEXT:    store double [[TMP5]], ptr [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 14
+// CHECK-NEXT:    store double [[TMP7]], ptr [[TMP8]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr <16 x double>, ptr [[TMP2]], i32 0, i32 15
+// CHECK-NEXT:    store double [[TMP9]], ptr [[TMP10]], align 8
+// CHECK-NEXT:    ret void
+//
+void ThreesSwizzleToScalar(out double4x4 A, double D) {
+    A._m30_m31_m32_m33 = D.xxxx;
+}
+
+// CHECK-LABEL: define hidden void @_Z21ThreesSwizzleToVectorRu11matrix_typeILm4ELm4EdEDv4_d(
+// CHECK-SAME: ptr noalias noundef nonnull align 8 dereferenceable(128) [[A:%.*]], <4 x double> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x double>, align 32
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x double> [[V]], ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
+// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 3
+// CHECK-NEXT:    store double [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 7
+// CHECK-NEXT:    store double [[TMP4]], ptr [[TMP5]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 11
+// CHECK-NEXT:    store double [[TMP6]], ptr [[TMP7]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr <16 x double>, ptr [[TMP1]], i32 0, i32 15
+// CHECK-NEXT:    store double [[TMP8]], ptr [[TMP9]], align 8
+// CHECK-NEXT:    ret void
+//
+void ThreesSwizzleToVector(out double4x4 A, double4 V) {
+    A._m03_m13_m23_m33 = V;
+}
+//.
+// CHECK: [[META4]] = !{}
+// CHECK: [[META5]] = !{i64 8}
+//.
diff --git a/clang/test/SemaHLSL/matrix-member-access-errors.hlsl b/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
new file mode 100644
index 0000000000000..33c540420f593
--- /dev/null
+++ b/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -finclude-default-header -verify %s
+
+void foo() {
+    float3x3 A;
+    float r = A._m00;      // read is ok
+    float good1 = A._11;    
+    float good2 = A._33;
+
+    float bad0 = A._m44;     // expected-error {{matrix row element accessor is out of bounds of zero based indexing}} expected-error {{matrix column element accessor is out of bounds of zero based indexing}}
+    float bad1 = A._m33;     // expected-error {{matrix row index 3 is out of bounds of rows size 3}} expected-error {{matrix column index 3 is out of bounds of columns size 3}}
+    float bad2 = A._mA2;     // expected-error {{invalid matrix member 'A' expected row as integer}}
+    float bad3 = A._m2F;     // expected-error {{invalid matrix member 'F' expected column as integer}}
+
+    float bad4 = A._00;      // expected-error {{matrix row element accessor is out of bounds of one based indexing}} expected-error {{matrix column element accessor is out of bounds of one based indexing}}
+    float bad5 = A._44;      // expected-error {{matrix row index 3 is out of bounds of rows size 3}} expected-error {{matrix column index 3 is out of bounds of columns size 3}}
+    float bad6 = A._55;      // expected-error {{matrix row element accessor is out of bounds of one based indexing}} expected-error {{matrix column element accessor is out of bounds of one based indexing}}
+    float bad7 = A.foo;      // expected-error {{invalid matrix member 'foo' expected zero based: '_mRC' or one-based: '_RC' accessor}}
+    float2 bad8 = A._m00_33; // expected-error {{invalid matrix member '3' expected '_m' prefix}}
+    float2 bad9 = A._11_m33; // expected-error {{invalid matrix member 'm' expected row as integer}}
+    float bad10 = A._m0000;  // expected-error {{invalid matrix member '0' expected '_m' prefix}}
+    float bad11 = A._m1;     // expected-error {{invalid matrix member '_m1' expected zero based: '_mRC' accessor}}
+    float bad12 = A._m;      // expected-error {{invalid matrix member '_m' expected zero based: '_mRC' accessor}}
+    float bad13 = A._1;      // expected-error {{invalid matrix member '_1' expected one-based: '_RC' accessor}}
+    float bad14 = A.m;       // expected-error {{invalid matrix member 'm' expected length 4 for zero based: '_mRC' or length 3 for one-based: '_RC' accessor}}
+
+    A._m12 = 3.14;           // write is OK
+    A._m00_m00 = 1.xx;       // expected-error {{matrix is not assignable (contains duplicate components)}}
+}
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 08ea73dcded08..20ddbd7d6e797 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -322,6 +322,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::ExprWithCleanupsClass:
   case Stmt::ExpressionTraitExprClass:
   case Stmt::ExtVectorElementExprClass:
+  case Stmt::MatrixElementExprClass:
   case Stmt::ImplicitCastExprClass:
   case Stmt::ImplicitValueInitExprClass:
   case Stmt::NoInitExprClass:

>From d22dd2ce06bd9639df892322df62eccc14c255b8 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Mon, 8 Dec 2025 17:37:38 -0500
Subject: [PATCH 2/4] - Remove all the Expr Constant AST visitors. - Remove CIR
 emitter stubs - Run clan format

---
 clang/include/clang/AST/Expr.h                  | 17 ++++++-----------
 clang/lib/AST/ByteCode/Compiler.cpp             |  6 ------
 clang/lib/AST/ByteCode/Compiler.h               |  1 -
 clang/lib/AST/ExprConstant.cpp                  |  7 -------
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp        |  2 --
 clang/lib/CIR/CodeGen/CIRGenFunction.h          |  2 --
 .../HLSL/pch_with_matrix_element_accessor.hlsl  |  2 +-
 7 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 676f59c7c104b..cf0d9e8cd291a 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -309,7 +309,7 @@ class Expr : public ValueStmt {
     MLV_DuplicateVectorComponents,
     MLV_DuplicateMatrixComponents,
     MLV_InvalidExpression,
-    MLV_LValueCast,           // Specialized form of MLV_InvalidExpression.
+    MLV_LValueCast, // Specialized form of MLV_InvalidExpression.
     MLV_IncompleteType,
     MLV_ConstQualified,
     MLV_ConstQualifiedField,
@@ -342,17 +342,17 @@ class Expr : public ValueStmt {
     enum Kinds {
       CL_LValue,
       CL_XValue,
-      CL_Function, // Functions cannot be lvalues in C.
-      CL_Void, // Void cannot be an lvalue in C.
+      CL_Function,        // Functions cannot be lvalues in C.
+      CL_Void,            // Void cannot be an lvalue in C.
       CL_AddressableVoid, // Void expression whose address can be taken in C.
       CL_DuplicateVectorComponents, // A vector shuffle with dupes.
       CL_DuplicateMatrixComponents, // A matrix shuffle with dupes.
       CL_MemberFunction, // An expression referring to a member function
       CL_SubObjCPropertySetting,
-      CL_ClassTemporary, // A temporary of class type, or subobject thereof.
-      CL_ArrayTemporary, // A temporary of array type.
+      CL_ClassTemporary,    // A temporary of class type, or subobject thereof.
+      CL_ArrayTemporary,    // A temporary of array type.
       CL_ObjCMessageRValue, // ObjC message is an rvalue
-      CL_PRValue // A prvalue for any other reason, of any other type
+      CL_PRValue            // A prvalue for any other reason, of any other type
     };
     /// The results of modification testing.
     enum ModifiableType {
@@ -6590,11 +6590,6 @@ template <class Derived> class ElementAccessExprBase : public Expr {
   }
   SourceLocation getEndLoc() const LLVM_READONLY { return AccessorLoc; }
 
-  /*static bool classof(const Stmt *T) {
-    return T->getStmtClass() == ExtVectorElementExprClass ||
-           T->getStmtClass() == MatrixElementExprClass;
-  }*/
-
   child_range children() { return child_range(&Base, &Base + 1); }
   const_child_range children() const {
     return const_child_range(&Base, &Base + 1);
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 8ca53af9ad108..21f8db06919ed 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4116,12 +4116,6 @@ bool Compiler<Emitter>::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
   return true;
 }
 
-template <class Emitter>
-bool Compiler<Emitter>::VisitMatrixElementExpr(const MatrixElementExpr *Node) {
-  // TODO
-  return false;
-}
-
 template <class Emitter>
 bool Compiler<Emitter>::VisitExtVectorElementExpr(
     const ExtVectorElementExpr *E) {
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 8f9a2b16f8596..1bd15c3d79563 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -223,7 +223,6 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
   bool VisitConvertVectorExpr(const ConvertVectorExpr *E);
   bool VisitShuffleVectorExpr(const ShuffleVectorExpr *E);
   bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E);
-  bool VisitMatrixElementExpr(const MatrixElementExpr *E);
   bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E);
   bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
   bool VisitStmtExpr(const StmtExpr *E);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ed8d795728343..fbf738e76fac6 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -8995,8 +8995,6 @@ class ExprEvaluatorBase
            DerivedSuccess(Result, E);
   }
 
-  bool VisitMatrixElementExpr(const MatrixElementExpr *E) { return false; }
-
   bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     APValue Val;
     if (!Evaluate(Val, Info, E->getBase()))
@@ -9339,7 +9337,6 @@ class LValueExprEvaluator
   bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
   bool VisitArraySubscriptExpr(const ArraySubscriptExpr *E);
   bool VisitExtVectorElementExpr(const ExtVectorElementExpr *E);
-  bool VisitMatrixElementExpr(const MatrixElementExpr *E);
   bool VisitUnaryDeref(const UnaryOperator *E);
   bool VisitUnaryReal(const UnaryOperator *E);
   bool VisitUnaryImag(const UnaryOperator *E);
@@ -9701,10 +9698,6 @@ bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) {
   return LValueExprEvaluatorBaseTy::VisitMemberExpr(E);
 }
 
-bool LValueExprEvaluator::VisitMatrixElementExpr(const MatrixElementExpr *E) {
-  return false;
-}
-
 bool LValueExprEvaluator::VisitExtVectorElementExpr(
     const ExtVectorElementExpr *E) {
   bool Success = true;
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 70e85ad02998f..ac66d00950f05 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -1001,8 +1001,6 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
     return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
   case Expr::ExtVectorElementExprClass:
     return emitExtVectorElementExpr(cast<ExtVectorElementExpr>(e));
-  case Expr::MatrixElementExprClass:
-    return emitMatrixElementExpr(cast<MatrixElementExpr>(e));
   case Expr::UnaryOperatorClass:
     return emitUnaryOpLValue(cast<UnaryOperator>(e));
   case Expr::StringLiteralClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 3a6663acbbf47..adcf4d56e3892 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1357,8 +1357,6 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   LValue emitExtVectorElementExpr(const ExtVectorElementExpr *e);
 
-  LValue emitMatrixElementExpr(cost MatrixElementExpr *e);
-
   Address emitArrayToPointerDecay(const Expr *e,
                                   LValueBaseInfo *baseInfo = nullptr);
 
diff --git a/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl b/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl
index 605a119e3beac..46f340a4196d8 100644
--- a/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl
+++ b/clang/test/AST/HLSL/pch_with_matrix_element_accessor.hlsl
@@ -23,4 +23,4 @@ float4 getDiag() {
 // CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector<float, 4>' lvalue ParmVar {{.*}} 'V' 'float4':'vector<float, 4>'
 void setRowZero(float4 V) {  
   gM._m00_m01_m02_m03 = V;
-}
\ No newline at end of file
+}

>From c243dda06f5b9f87eee930f9f4cc106e003cd6a3 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 20 Jan 2026 17:06:34 -0500
Subject: [PATCH 3/4] address easy parts of pr

---
 clang/include/clang/Sema/SemaHLSL.h           |  2 +-
 clang/lib/AST/Expr.cpp                        |  4 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  9 ++---
 clang/lib/Sema/SemaExprMember.cpp             |  2 +-
 clang/lib/Sema/SemaHLSL.cpp                   | 39 ++++++++-----------
 ...atrix-member-access-swizzle-ast-print.hlsl |  8 ++--
 .../SemaHLSL/matrix-member-access-errors.hlsl | 34 +++++++++-------
 7 files changed, 47 insertions(+), 51 deletions(-)

diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index e7e5f4bba4088..020a4dc44ee7f 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -215,7 +215,7 @@ class SemaHLSL : public SemaBase {
   bool transformInitList(const InitializedEntity &Entity, InitListExpr *Init);
   bool handleInitialization(VarDecl *VDecl, Expr *&Init);
   void deduceAddressSpace(VarDecl *Decl);
-  QualType CheckMatrixComponent(Sema &S, QualType baseType, ExprValueKind &VK,
+  QualType checkMatrixComponent(Sema &S, QualType baseType, ExprValueKind &VK,
                                 SourceLocation OpLoc,
                                 const IdentifierInfo *CompName,
                                 SourceLocation CompLoc);
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index e8e2ff96cd84a..a20964e3afa26 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4479,15 +4479,13 @@ bool MatrixElementExpr::containsDuplicateElements() const {
     IsZeroIndexed = false;
     ChunkLen = 3;
   }
-
-  assert(ChunkLen && "unrecognized matrix swizzle format");
   assert(Comp.size() % ChunkLen == 0 &&
          "matrix swizzle accessor has invalid length");
 
   // Track visited elements using real matrix size.
   SmallVector<bool, 16> Seen(Max, false);
 
-  for (unsigned I = 0, e = Comp.size(); I < e; I += ChunkLen) {
+  for (unsigned I = 0, E = Comp.size(); I < E; I += ChunkLen) {
     unsigned Row = 0, Col = 0;
 
     if (IsZeroIndexed) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a826d8cdf934c..c6117089a3fd9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2299,7 +2299,6 @@ static RawAddress MaybeConvertMatrixAddress(RawAddress Addr,
 }
 
 LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
-  // return EmitUnsupportedLValue(E, "Matrix swizzle");
   LValue Base;
   if (E->getBase()->isGLValue())
     Base = EmitLValue(E->getBase());
@@ -2315,7 +2314,7 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
     Builder.CreateStore(Mat, MatMem);
     Base = MakeAddrLValue(MatMem, Ty, AlignmentSource::Decl);
   }
-  QualType type =
+  QualType ResultType =
       E->getType().withCVRQualifiers(Base.getQuals().getCVRQualifiers());
 
   // Encode the element access list into a vector of unsigned indices.
@@ -2326,7 +2325,7 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
     llvm::Constant *CV =
         llvm::ConstantDataVector::get(getLLVMContext(), Indices);
     return LValue::MakeExtVectorElt(
-        MaybeConvertMatrixAddress(Base.getAddress(), *this), CV, type,
+        MaybeConvertMatrixAddress(Base.getAddress(), *this), CV, ResultType,
         Base.getBaseInfo(), TBAAAccessInfo());
   }
   assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
@@ -2338,8 +2337,8 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
     CElts.push_back(BaseElts->getAggregateElement(Index));
   llvm::Constant *CV = llvm::ConstantVector::get(CElts);
   return LValue::MakeExtVectorElt(
-      MaybeConvertMatrixAddress(Base.getExtVectorAddress(), *this), CV, type,
-      Base.getBaseInfo(), TBAAAccessInfo());
+      MaybeConvertMatrixAddress(Base.getExtVectorAddress(), *this), CV,
+      ResultType, Base.getBaseInfo(), TBAAAccessInfo());
 }
 
 // Emit a store of a matrix LValue. This may require casting the original
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index 80d0f4945db67..e2f26ef5aa2b2 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -1622,7 +1622,7 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R,
   if (S.getLangOpts().HLSL && BaseType->isConstantMatrixType()) {
     IdentifierInfo *Member = MemberName.getAsIdentifierInfo();
     ExprValueKind VK = BaseExpr.get()->getValueKind();
-    QualType Ret = S.HLSL().CheckMatrixComponent(S, BaseType, VK, OpLoc, Member,
+    QualType Ret = S.HLSL().checkMatrixComponent(S, BaseType, VK, OpLoc, Member,
                                                  MemberLoc);
     if (Ret.isNull())
       return ExprError();
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 347df16cbd2a0..9bb1d59bdb5e7 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4796,14 +4796,14 @@ bool SemaHLSL::transformInitList(const InitializedEntity &Entity,
   return true;
 }
 
-QualType SemaHLSL::CheckMatrixComponent(Sema &S, QualType baseType,
+QualType SemaHLSL::checkMatrixComponent(Sema &S, QualType baseType,
                                         ExprValueKind &VK, SourceLocation OpLoc,
                                         const IdentifierInfo *CompName,
                                         SourceLocation CompLoc) {
   const auto *MT = baseType->getAs<ConstantMatrixType>();
   StringRef AccessorName = CompName->getName();
   assert(MT &&
-         "CheckMatrixComponent is intended to be used on ConstantMatrixType");
+         "checkMatrixComponent is intended to be used on ConstantMatrixType");
   assert(!AccessorName.empty() && "Matrix Accessor must have a name");
 
   unsigned Rows = MT->getNumRows();
@@ -4818,13 +4818,14 @@ QualType SemaHLSL::CheckMatrixComponent(Sema &S, QualType baseType,
         << SourceRange(CompLoc);
     return QualType();
   }
-  if (AccessorName[0] == '_' && AccessorName[1] == 'm') {
-    IsZeroBasedAccessor = true; // zero-based: 00..33
-    ChunkLen = 4;               // zero-based: "_mRC"
-  } else if (AccessorName[0] == '_')
-    // one-based: 11..44
-    ChunkLen = 3; // one-based: "_RC"
-  else {
+  if (AccessorName[0] == '_') {
+    if (AccessorName[1] == 'm') {
+      IsZeroBasedAccessor = true;
+      ChunkLen = 4; // zero-based: "_mRC"
+    } else {
+      ChunkLen = 3; // one-based: "_RC"
+    }
+  } else {
     const char Expected[] =
         "zero based: \'_mRC\' or one-based: \'_RC\' accessor";
     S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
@@ -4833,25 +4834,19 @@ QualType SemaHLSL::CheckMatrixComponent(Sema &S, QualType baseType,
     return QualType();
   }
 
-  if (IsZeroBasedAccessor && AccessorName.size() < 4) {
-    const char Expected[] = "zero based: \'_mRC\' accessor";
-    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
-        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
-        << SourceRange(CompLoc);
-    return QualType();
-  }
+  if (AccessorName.size() % ChunkLen != 0) {
+    const llvm::StringRef Expected = IsZeroBasedAccessor
+                                         ? "zero based: '_mRC' accessor"
+                                         : "one-based: '_RC' accessor";
 
-  if (AccessorName.size() < 3) {
-    const char Expected[] = "one-based: \'_RC\' accessor";
     S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
-        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
-        << SourceRange(CompLoc);
+        << CompName->getName() << Expected << SourceRange(CompLoc);
     return QualType();
   }
 
   auto isDigit = [](char c) { return c >= '0' && c <= '9'; };
-  auto isZeroBasedIndex = [](int i) { return i >= 0 && i <= 3; };
-  auto isOneBasedIndex = [](int i) { return i >= 1 && i <= 4; };
+  auto isZeroBasedIndex = [](unsigned i) { return i <= 3; };
+  auto isOneBasedIndex = [](unsigned i) { return i >= 1 && i <= 4; };
 
   bool HasRepeated = false;
   SmallVector<bool, 16> Seen(Rows * Cols, false);
diff --git a/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl b/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl
index fa265987bd000..9d7c7ea5d9133 100644
--- a/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl
+++ b/clang/test/AST/HLSL/matrix-member-access-swizzle-ast-print.hlsl
@@ -11,11 +11,11 @@ float4x4 gMat;
 [numthreads(1, 1, 1)]
 void main() {
   float4x4 A = gMat;
-  float3 v1 = A._11_22_33;
-  float2 v2 = A._m00_m11;
+  float3 v1 = A._12_21_32;
+  float2 v2 = A._m01_m10;
 }
 
 // CHECK: float4x4 gMat;
 // CHECK: float4x4 A = gMat;
-// CHECK: float3 v1 = A._11_22_33;
-// CHECK: float2 v2 = A._m00_m11;
+// CHECK: float3 v1 = A._12_21_32;
+// CHECK: float2 v2 = A._m01_m10;
diff --git a/clang/test/SemaHLSL/matrix-member-access-errors.hlsl b/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
index 33c540420f593..1d1fc479b09d7 100644
--- a/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
+++ b/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
@@ -6,22 +6,26 @@ void foo() {
     float good1 = A._11;    
     float good2 = A._33;
 
-    float bad0 = A._m44;     // expected-error {{matrix row element accessor is out of bounds of zero based indexing}} expected-error {{matrix column element accessor is out of bounds of zero based indexing}}
-    float bad1 = A._m33;     // expected-error {{matrix row index 3 is out of bounds of rows size 3}} expected-error {{matrix column index 3 is out of bounds of columns size 3}}
-    float bad2 = A._mA2;     // expected-error {{invalid matrix member 'A' expected row as integer}}
-    float bad3 = A._m2F;     // expected-error {{invalid matrix member 'F' expected column as integer}}
+    float bad0 = A._m44;      // expected-error {{matrix row element accessor is out of bounds of zero based indexing}} expected-error {{matrix column element accessor is out of bounds of zero based indexing}}
+    float bad1 = A._m33;      // expected-error {{matrix row index 3 is out of bounds of rows size 3}} expected-error {{matrix column index 3 is out of bounds of columns size 3}}
+    float bad2 = A._mA2;      // expected-error {{invalid matrix member 'A' expected row as integer}}
+    float bad3 = A._m2F;      // expected-error {{invalid matrix member 'F' expected column as integer}}
 
-    float bad4 = A._00;      // expected-error {{matrix row element accessor is out of bounds of one based indexing}} expected-error {{matrix column element accessor is out of bounds of one based indexing}}
-    float bad5 = A._44;      // expected-error {{matrix row index 3 is out of bounds of rows size 3}} expected-error {{matrix column index 3 is out of bounds of columns size 3}}
-    float bad6 = A._55;      // expected-error {{matrix row element accessor is out of bounds of one based indexing}} expected-error {{matrix column element accessor is out of bounds of one based indexing}}
-    float bad7 = A.foo;      // expected-error {{invalid matrix member 'foo' expected zero based: '_mRC' or one-based: '_RC' accessor}}
-    float2 bad8 = A._m00_33; // expected-error {{invalid matrix member '3' expected '_m' prefix}}
-    float2 bad9 = A._11_m33; // expected-error {{invalid matrix member 'm' expected row as integer}}
-    float bad10 = A._m0000;  // expected-error {{invalid matrix member '0' expected '_m' prefix}}
-    float bad11 = A._m1;     // expected-error {{invalid matrix member '_m1' expected zero based: '_mRC' accessor}}
-    float bad12 = A._m;      // expected-error {{invalid matrix member '_m' expected zero based: '_mRC' accessor}}
-    float bad13 = A._1;      // expected-error {{invalid matrix member '_1' expected one-based: '_RC' accessor}}
-    float bad14 = A.m;       // expected-error {{invalid matrix member 'm' expected length 4 for zero based: '_mRC' or length 3 for one-based: '_RC' accessor}}
+    float bad4 = A._00;       // expected-error {{matrix row element accessor is out of bounds of one based indexing}} expected-error {{matrix column element accessor is out of bounds of one based indexing}}
+    float bad5 = A._44;       // expected-error {{matrix row index 3 is out of bounds of rows size 3}} expected-error {{matrix column index 3 is out of bounds of columns size 3}}
+    float bad6 = A._55;       // expected-error {{matrix row element accessor is out of bounds of one based indexing}} expected-error {{matrix column element accessor is out of bounds of one based indexing}}
+    float bad7 = A.foo;       // expected-error {{invalid matrix member 'foo' expected zero based: '_mRC' or one-based: '_RC' accessor}}
+    float2 bad8 = A._m00_33;  // expected-error {{invalid matrix member '_m00_33' expected zero based: '_mRC' accessor}}
+    float2 bad9 = A._11_m33;  // expected-error {{invalid matrix member '_11_m33' expected one-based: '_RC' accessor}}
+    float bad10 = A._m0000;   // expected-error {{invalid matrix member '_m0000' expected zero based: '_mRC' accessor}}
+    float bad11 = A._m1;      // expected-error {{invalid matrix member '_m1' expected zero based: '_mRC' accessor}}
+    float bad12 = A._m;       // expected-error {{invalid matrix member '_m' expected zero based: '_mRC' accessor}}
+    float bad13 = A._1;       // expected-error {{invalid matrix member '_1' expected one-based: '_RC' accessor}}
+    float bad14 = A.m;        // expected-error {{invalid matrix member 'm' expected length 4 for zero based: '_mRC' or length 3 for one-based: '_RC' accessor}}
+    float bad15 = A._;        // expected-error {{invalid matrix member '_' expected length 4 for zero based: '_mRC' or length 3 for one-based: '_RC' accessor}}
+    float bad16 = A._m00_m;   // expected-error {{invalid matrix member '_m00_m' expected zero based: '_mRC' accessor}}
+    float bad17 = A._m11_m2;  // expected-error {{invalid matrix member '_m11_m2' expected zero based: '_mRC' accessor}}
+    float bad18 = A._m11_mAF; // expected-error {{invalid matrix member 'A' expected row as integer}} // expected-error {{invalid matrix member 'F' expected column as integer}}
 
     A._m12 = 3.14;           // write is OK
     A._m00_m00 = 1.xx;       // expected-error {{matrix is not assignable (contains duplicate components)}}

>From 593abd1684f86be63f10662f583b0e1287ad068e Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 22 Jan 2026 11:30:48 -0500
Subject: [PATCH 4/4] address more of the easy stuff

---
 clang/include/clang/AST/Expr.h                |  16 ++
 clang/lib/AST/Expr.cpp                        |  28 ++--
 clang/lib/Sema/SemaHLSL.cpp                   |  93 +++++------
 ...ember-one-based-accessor-scalar-store.hlsl | 144 ++++++++----------
 ...mber-zero-based-accessor-scalar-store.hlsl | 144 ++++++++----------
 .../SemaHLSL/matrix-member-access-errors.hlsl |   7 +
 6 files changed, 203 insertions(+), 229 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index cf0d9e8cd291a..697dc73c46c2d 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6612,11 +6612,19 @@ class ExtVectorElementExpr
             ExtVectorElementExprClass, Ty, VK, Base, Accessor, Loc,
             (VK == VK_PRValue ? OK_Ordinary : OK_VectorComponent)) {}
 
+  /// Build an empty vector element expression.
   explicit ExtVectorElementExpr(EmptyShell Empty)
       : ElementAccessExprBase(ExtVectorElementExprClass, Empty) {}
 
+  /// getNumElements - Get the number of components being selected.
   unsigned getNumElements() const;
+
+  /// containsDuplicateElements - Return true if any element access is
+  /// repeated.
   bool containsDuplicateElements() const;
+
+  /// getEncodedElementAccess - Encode the elements accessed into an llvm
+  /// aggregate Constant of ConstantInt(s).
   void getEncodedElementAccess(SmallVectorImpl<uint32_t> &Elts) const;
 
   /// isArrow - Return true if the base expression is a pointer to vector,
@@ -6636,11 +6644,19 @@ class MatrixElementExpr : public ElementAccessExprBase<MatrixElementExpr> {
             MatrixElementExprClass, Ty, VK, Base, Accessor, Loc,
             OK_Ordinary /*TODO: Should we add a new OK_MatrixComponent?*/) {}
 
+  /// Build an empty matrix element expression.
   explicit MatrixElementExpr(EmptyShell Empty)
       : ElementAccessExprBase(MatrixElementExprClass, Empty) {}
 
+  /// getNumElements - Get the number of components being selected.
   unsigned getNumElements() const;
+
+  /// containsDuplicateElements - Return true if any element access is
+  /// repeated.
   bool containsDuplicateElements() const;
+
+  /// getEncodedElementAccess - Encode the elements accessed into an llvm
+  /// aggregate Constant of ConstantInt(s).
   void getEncodedElementAccess(SmallVectorImpl<uint32_t> &Elts) const;
 
   static bool classof(const Stmt *T) {
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index a20964e3afa26..b21acb416e9b9 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4425,7 +4425,7 @@ unsigned ExtVectorElementExpr::getNumElements() const {
 }
 
 unsigned MatrixElementExpr::getNumElements() const {
-  if (const ConstantMatrixType *MT = getType()->getAs<ConstantMatrixType>())
+  if (const auto *MT = getType()->getAs<ConstantMatrixType>())
     return MT->getNumElementsFlattened();
   return 1;
 }
@@ -4459,9 +4459,7 @@ bool MatrixElementExpr::containsDuplicateElements() const {
   assert(!Comp.empty() && Comp[0] == '_' && "invalid matrix accessor");
 
   // Get the matrix type so we know bounds.
-  const ConstantMatrixType *MT =
-      getBase()->getType()->getAs<ConstantMatrixType>();
-  assert(MT && "MatrixElementExpr base must be a matrix type");
+  const auto *MT = getBase()->getType()->castAs<ConstantMatrixType>();
 
   unsigned Rows = MT->getNumRows();
   unsigned Cols = MT->getNumColumns();
@@ -4483,7 +4481,7 @@ bool MatrixElementExpr::containsDuplicateElements() const {
          "matrix swizzle accessor has invalid length");
 
   // Track visited elements using real matrix size.
-  SmallVector<bool, 16> Seen(Max, false);
+  llvm::BitVector Seen(Max, /*t=*/false);
 
   for (unsigned I = 0, E = Comp.size(); I < E; I += ChunkLen) {
     unsigned Row = 0, Col = 0;
@@ -4507,7 +4505,7 @@ bool MatrixElementExpr::containsDuplicateElements() const {
     if (Seen[Index])
       return true;
 
-    Seen[Index] = true;
+    Seen.set(Index);
   }
   return false;
 }
@@ -4550,9 +4548,7 @@ void MatrixElementExpr::getEncodedElementAccess(
   StringRef Comp = Accessor->getName();
   assert(!Comp.empty() && Comp[0] == '_' && "invalid matrix accessor");
 
-  const ConstantMatrixType *MT =
-      getBase()->getType()->getAs<ConstantMatrixType>();
-  assert(MT && "MatrixElementExpr base must be a matrix type");
+  const auto *MT = getBase()->getType()->castAs<ConstantMatrixType>();
 
   unsigned Rows = MT->getNumRows();
   unsigned Cols = MT->getNumColumns();
@@ -4574,20 +4570,20 @@ void MatrixElementExpr::getEncodedElementAccess(
   assert(Comp.size() % ChunkLen == 0 &&
          "matrix swizzle accessor has invalid length");
 
-  for (unsigned i = 0, e = Comp.size(); i < e; i += ChunkLen) {
+  for (unsigned I = 0, E = Comp.size(); I < E; I += ChunkLen) {
     unsigned Row = 0, Col = 0;
 
     if (IsZeroIndexed) {
       // Pattern: _mRC
-      assert(Comp[i] == '_' && Comp[i + 1] == 'm' &&
+      assert(Comp[I] == '_' && Comp[I + 1] == 'm' &&
              "invalid zero-indexed matrix swizzle component");
-      Row = static_cast<unsigned>(Comp[i + 2] - '0'); // 0..Rows-1
-      Col = static_cast<unsigned>(Comp[i + 3] - '0'); // 0..Cols-1
+      Row = static_cast<unsigned>(Comp[I + 2] - '0'); // 0..Rows-1
+      Col = static_cast<unsigned>(Comp[I + 3] - '0'); // 0..Cols-1
     } else {
       // Pattern: _RC
-      assert(Comp[i] == '_' && "invalid one-indexed matrix swizzle component");
-      Row = static_cast<unsigned>(Comp[i + 1] - '1'); // 1..Rows -> 0..Rows-1
-      Col = static_cast<unsigned>(Comp[i + 2] - '1'); // 1..Cols -> 0..Cols-1
+      assert(Comp[I] == '_' && "invalid one-indexed matrix swizzle component");
+      Row = static_cast<unsigned>(Comp[I + 1] - '1'); // 1..Rows -> 0..Rows-1
+      Col = static_cast<unsigned>(Comp[I + 2] - '1'); // 1..Cols -> 0..Cols-1
     }
 
     // Sema should have validated these, but assert here for sanity.
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 9bb1d59bdb5e7..bb1251e830724 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4796,28 +4796,33 @@ bool SemaHLSL::transformInitList(const InitializedEntity &Entity,
   return true;
 }
 
+static QualType ReportMatrixInvalidMember(Sema &S, StringRef Name,
+                                          StringRef Expected,
+                                          SourceLocation OpLoc,
+                                          SourceLocation CompLoc) {
+  S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
+      << Name << Expected << SourceRange(CompLoc);
+  return QualType();
+}
+
 QualType SemaHLSL::checkMatrixComponent(Sema &S, QualType baseType,
                                         ExprValueKind &VK, SourceLocation OpLoc,
                                         const IdentifierInfo *CompName,
                                         SourceLocation CompLoc) {
-  const auto *MT = baseType->getAs<ConstantMatrixType>();
+  const auto *MT = baseType->castAs<ConstantMatrixType>();
   StringRef AccessorName = CompName->getName();
-  assert(MT &&
-         "checkMatrixComponent is intended to be used on ConstantMatrixType");
   assert(!AccessorName.empty() && "Matrix Accessor must have a name");
 
   unsigned Rows = MT->getNumRows();
   unsigned Cols = MT->getNumColumns();
   bool IsZeroBasedAccessor = false;
   unsigned ChunkLen = 0;
-  if (AccessorName.size() < 2) {
-    const char Expected[] = "length 4 for zero based: \'_mRC\' or length 3 for "
-                            "one-based: \'_RC\' accessor";
-    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
-        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
-        << SourceRange(CompLoc);
-    return QualType();
-  }
+  if (AccessorName.size() < 2)
+    return ReportMatrixInvalidMember(S, AccessorName,
+                                     "length 4 for zero based: \'_mRC\' or "
+                                     "length 3 for one-based: \'_RC\' accessor",
+                                     OpLoc, CompLoc);
+
   if (AccessorName[0] == '_') {
     if (AccessorName[1] == 'm') {
       IsZeroBasedAccessor = true;
@@ -4825,23 +4830,17 @@ QualType SemaHLSL::checkMatrixComponent(Sema &S, QualType baseType,
     } else {
       ChunkLen = 3; // one-based: "_RC"
     }
-  } else {
-    const char Expected[] =
-        "zero based: \'_mRC\' or one-based: \'_RC\' accessor";
-    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
-        << CompName->getName() << StringRef(Expected, sizeof(Expected) - 1)
-        << SourceRange(CompLoc);
-    return QualType();
-  }
+  } else
+    return ReportMatrixInvalidMember(
+        S, AccessorName, "zero based: \'_mRC\' or one-based: \'_RC\' accessor",
+        OpLoc, CompLoc);
 
   if (AccessorName.size() % ChunkLen != 0) {
     const llvm::StringRef Expected = IsZeroBasedAccessor
                                          ? "zero based: '_mRC' accessor"
                                          : "one-based: '_RC' accessor";
 
-    S.Diag(OpLoc, diag::err_builtin_matrix_invalid_member)
-        << CompName->getName() << Expected << SourceRange(CompLoc);
-    return QualType();
+    return ReportMatrixInvalidMember(S, AccessorName, Expected, OpLoc, CompLoc);
   }
 
   auto isDigit = [](char c) { return c >= '0' && c <= '9'; };
@@ -4860,52 +4859,40 @@ QualType SemaHLSL::checkMatrixComponent(Sema &S, QualType baseType,
       // Zero-based: "_mRC"
       if (Chunk[0] != '_' || Chunk[1] != 'm') {
         char Bad = (Chunk[0] != '_') ? Chunk[0] : Chunk[1];
-        const char Expected[] = "\'_m\' prefix";
-        S.Diag(OpLoc.getLocWithOffset(I + (Bad == Chunk[0] ? 1 : 2)),
-               diag::err_builtin_matrix_invalid_member)
-            << StringRef(&Bad, 1) << StringRef(Expected, sizeof(Expected) - 1)
-            << SourceRange(CompLoc);
-        return QualType();
+        return ReportMatrixInvalidMember(
+            S, StringRef(&Bad, 1), "\'_m\' prefix",
+            OpLoc.getLocWithOffset(I + (Bad == Chunk[0] ? 1 : 2)), CompLoc);
       }
       RowChar = Chunk[2];
       ColChar = Chunk[3];
     } else {
       // One-based: "_RC"
-      if (Chunk[0] != '_') {
-        const char Expected[] = "\'_\' prefix";
-        S.Diag(OpLoc.getLocWithOffset(I + 1),
-               diag::err_builtin_matrix_invalid_member)
-            << StringRef(&Chunk[0], 1)
-            << StringRef(Expected, sizeof(Expected) - 1)
-            << SourceRange(CompLoc);
-        return QualType();
-      }
+      if (Chunk[0] != '_')
+        return ReportMatrixInvalidMember(
+            S, StringRef(&Chunk[0], 1), "\'_\' prefix",
+            OpLoc.getLocWithOffset(I + 1), CompLoc);
       RowChar = Chunk[1];
       ColChar = Chunk[2];
     }
 
     // Must be digits.
-    bool isDigitsError = false;
+    bool IsDigitsError = false;
     if (!isDigit(RowChar)) {
-      const char Expected[] = "row as integer";
       unsigned BadPos = IsZeroBasedAccessor ? 2 : 1;
-      S.Diag(OpLoc.getLocWithOffset(I + BadPos + 1),
-             diag::err_builtin_matrix_invalid_member)
-          << StringRef(&RowChar, 1) << StringRef(Expected, sizeof(Expected) - 1)
-          << SourceRange(CompLoc);
-      isDigitsError = true;
+      ReportMatrixInvalidMember(S, StringRef(&RowChar, 1), "row as integer",
+                                OpLoc.getLocWithOffset(I + BadPos + 1),
+                                CompLoc);
+      IsDigitsError = true;
     }
 
     if (!isDigit(ColChar)) {
-      const char Expected[] = "column as integer";
       unsigned BadPos = IsZeroBasedAccessor ? 3 : 2;
-      S.Diag(OpLoc.getLocWithOffset(I + BadPos + 1),
-             diag::err_builtin_matrix_invalid_member)
-          << StringRef(&ColChar, 1) << StringRef(Expected, sizeof(Expected) - 1)
-          << SourceRange(CompLoc);
-      isDigitsError = true;
+      ReportMatrixInvalidMember(S, StringRef(&ColChar, 1), "column as integer",
+                                OpLoc.getLocWithOffset(I + BadPos + 1),
+                                CompLoc);
+      IsDigitsError = true;
     }
-    if (isDigitsError)
+    if (IsDigitsError)
       return QualType();
 
     unsigned Row = RowChar - '0';
@@ -4937,8 +4924,8 @@ QualType SemaHLSL::checkMatrixComponent(Sema &S, QualType baseType,
         HasIndexingError = true;
       }
       // Convert to 0-based after range checking.
-      Row--;
-      Col--;
+      --Row;
+      --Col;
     }
 
     if (HasIndexingError)
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
index 5f80f924b024f..3098a09f67100 100644
--- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
@@ -4,7 +4,7 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat11Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat11Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -17,14 +17,13 @@
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP1]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat11(out int4x4 A, int I) {
-    return A._11 = I;
+void StoreScalarAtMat11(out int4x4 A, int I) {
+    A._11 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat12Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat12Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -38,14 +37,13 @@ int StoreScalarAtMat11(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat12(out int4x4 A, int I) {
-    return A._12 = I;
+void StoreScalarAtMat12(out int4x4 A, int I) {
+    A._12 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat13Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat13Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -59,14 +57,13 @@ int StoreScalarAtMat12(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat13(out int4x4 A, int I) {
-    return A._13 = I;
+void StoreScalarAtMat13(out int4x4 A, int I) {
+    A._13 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat14Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat14Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -80,14 +77,13 @@ int StoreScalarAtMat13(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 3
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat14(out int4x4 A, int I) {
-    return A._14 = I;
+void StoreScalarAtMat14(out int4x4 A, int I) {
+    A._14 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat21Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat21Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -101,14 +97,13 @@ int StoreScalarAtMat14(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat21(out int4x4 A, int I) {
-    return A._21 = I;
+void StoreScalarAtMat21(out int4x4 A, int I) {
+    A._21 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat22Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat22Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -122,14 +117,13 @@ int StoreScalarAtMat21(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 5
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat22(out int4x4 A, int I) {
-    return A._22 = I;
+void StoreScalarAtMat22(out int4x4 A, int I) {
+    A._22 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat23Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat23Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -143,14 +137,13 @@ int StoreScalarAtMat22(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat23(out int4x4 A, int I) {
-    return A._23 = I;
+void StoreScalarAtMat23(out int4x4 A, int I) {
+    A._23 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat24Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat24Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -164,14 +157,13 @@ int StoreScalarAtMat23(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 7
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat24(out int4x4 A, int I) {
-    return A._24 = I;
+void StoreScalarAtMat24(out int4x4 A, int I) {
+    A._24 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat31Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat31Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -185,14 +177,13 @@ int StoreScalarAtMat24(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat31(out int4x4 A, int I) {
-    return A._31 = I;
+void StoreScalarAtMat31(out int4x4 A, int I) {
+    A._31 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat32Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat32Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -206,14 +197,13 @@ int StoreScalarAtMat31(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat32(out int4x4 A, int I) {
-    return A._32 = I;
+void StoreScalarAtMat32(out int4x4 A, int I) {
+    A._32 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat33Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat33Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -227,14 +217,13 @@ int StoreScalarAtMat32(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 10
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat33(out int4x4 A, int I) {
-    return A._33 = I;
+void StoreScalarAtMat33(out int4x4 A, int I) {
+    A._33 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat34Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat34Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -248,14 +237,13 @@ int StoreScalarAtMat33(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 11
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat34(out int4x4 A, int I) {
-    return A._34 = I;
+void StoreScalarAtMat34(out int4x4 A, int I) {
+    A._34 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat41Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat41Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -269,14 +257,13 @@ int StoreScalarAtMat34(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat41(out int4x4 A, int I) {
-    return A._41 = I;
+void StoreScalarAtMat41(out int4x4 A, int I) {
+    A._41 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat42Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat42Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -290,14 +277,13 @@ int StoreScalarAtMat41(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat42(out int4x4 A, int I) {
-    return A._42 = I;
+void StoreScalarAtMat42(out int4x4 A, int I) {
+    A._42 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat43Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat43Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -311,14 +297,13 @@ int StoreScalarAtMat42(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat43(out int4x4 A, int I) {
-    return A._43 = I;
+void StoreScalarAtMat43(out int4x4 A, int I) {
+    A._43 = I;
 }
 
-// CHECK-LABEL: define hidden noundef i32 @_Z18StoreScalarAtMat44Ru11matrix_typeILm4ELm4EiEi(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat44Ru11matrix_typeILm4ELm4EiEi(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -332,11 +317,10 @@ int StoreScalarAtMat43(out int4x4 A, int I) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 15
 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i32> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret i32 [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-int StoreScalarAtMat44(out int4x4 A, int I) {
-    return A._44 = I;
+void StoreScalarAtMat44(out int4x4 A, int I) {
+    A._44 = I;
 }
 
 //.
diff --git a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
index 3c11f5fc29515..fb4fa267174b9 100644
--- a/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl
@@ -4,7 +4,7 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat00Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat00Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -17,14 +17,13 @@
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP1]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat00(out float4x4 A, float F) {
-    return A._m00 = F;
+void StoreScalarAtMat00(out float4x4 A, float F) {
+    A._m00 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat01Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat01Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -38,14 +37,13 @@ float StoreScalarAtMat00(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 1
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat01(out float4x4 A, float F) {
-    return A._m01 = F;
+void StoreScalarAtMat01(out float4x4 A, float F) {
+    A._m01 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat02Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat02Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -59,14 +57,13 @@ float StoreScalarAtMat01(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 2
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat02(out float4x4 A, float F) {
-    return A._m02 = F;
+void StoreScalarAtMat02(out float4x4 A, float F) {
+    A._m02 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat03Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat03Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -80,14 +77,13 @@ float StoreScalarAtMat02(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 3
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat03(out float4x4 A, float F) {
-    return A._m03 = F;
+void StoreScalarAtMat03(out float4x4 A, float F) {
+    A._m03 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat10Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat10Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -101,14 +97,13 @@ float StoreScalarAtMat03(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 4
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat10(out float4x4 A, float F) {
-    return A._m10 = F;
+void StoreScalarAtMat10(out float4x4 A, float F) {
+    A._m10 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat11Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat11Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -122,14 +117,13 @@ float StoreScalarAtMat10(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 5
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat11(out float4x4 A, float F) {
-    return A._m11 = F;
+void StoreScalarAtMat11(out float4x4 A, float F) {
+    A._m11 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat12Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat12Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -143,14 +137,13 @@ float StoreScalarAtMat11(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 6
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat12(out float4x4 A, float F) {
-    return A._m12 = F;
+void StoreScalarAtMat12(out float4x4 A, float F) {
+    A._m12 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat13Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat13Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -164,14 +157,13 @@ float StoreScalarAtMat12(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 7
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat13(out float4x4 A, float F) {
-    return A._m13 = F;
+void StoreScalarAtMat13(out float4x4 A, float F) {
+    A._m13 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat20Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat20Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -185,14 +177,13 @@ float StoreScalarAtMat13(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 8
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat20(out float4x4 A, float F) {
-    return A._m20 = F;
+void StoreScalarAtMat20(out float4x4 A, float F) {
+    A._m20 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat21Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat21Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -206,14 +197,13 @@ float StoreScalarAtMat20(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 9
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat21(out float4x4 A, float F) {
-    return A._m21 = F;
+void StoreScalarAtMat21(out float4x4 A, float F) {
+    A._m21 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat22Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat22Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -227,14 +217,13 @@ float StoreScalarAtMat21(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 10
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat22(out float4x4 A, float F) {
-    return A._m22 = F;
+void StoreScalarAtMat22(out float4x4 A, float F) {
+    A._m22 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat23Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat23Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -248,14 +237,13 @@ float StoreScalarAtMat22(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 11
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat23(out float4x4 A, float F) {
-    return A._m23 = F;
+void StoreScalarAtMat23(out float4x4 A, float F) {
+    A._m23 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat30Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat30Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -269,14 +257,13 @@ float StoreScalarAtMat23(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 12
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat30(out float4x4 A, float F) {
-    return A._m30 = F;
+void StoreScalarAtMat30(out float4x4 A, float F) {
+    A._m30 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat31Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat31Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -290,14 +277,13 @@ float StoreScalarAtMat30(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 13
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat31(out float4x4 A, float F) {
-    return A._m31 = F;
+void StoreScalarAtMat31(out float4x4 A, float F) {
+    A._m31 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat32Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat32Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -311,14 +297,13 @@ float StoreScalarAtMat31(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 14
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat32(out float4x4 A, float F) {
-    return A._m32 = F;
+void StoreScalarAtMat32(out float4x4 A, float F) {
+    A._m32 = F;
 }
 
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z18StoreScalarAtMat33Ru11matrix_typeILm4ELm4EfEf(
+// CHECK-LABEL: define hidden void @_Z18StoreScalarAtMat33Ru11matrix_typeILm4ELm4EfEf(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[A:%.*]], float noundef nofpclass(nan inf) [[F:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -332,11 +317,10 @@ float StoreScalarAtMat32(out float4x4 A, float F) {
 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <16 x float>, ptr [[TMP1]], i32 0, i32 15
 // CHECK-NEXT:    store float [[TMP2]], ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x float> [[SPLAT_SPLAT]], i32 0
-// CHECK-NEXT:    ret float [[CAST_VTRUNC]]
+// CHECK-NEXT:    ret void
 //
-float StoreScalarAtMat33(out float4x4 A, float F) {
-    return A._m33 = F;
+void StoreScalarAtMat33(out float4x4 A, float F) {
+    A._m33 = F;
 }
 
 //.
diff --git a/clang/test/SemaHLSL/matrix-member-access-errors.hlsl b/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
index 1d1fc479b09d7..bba038651f210 100644
--- a/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
+++ b/clang/test/SemaHLSL/matrix-member-access-errors.hlsl
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -finclude-default-header -verify %s
 
+typedef vector<float, 5> float5;
+
 void foo() {
     float3x3 A;
     float r = A._m00;      // read is ok
@@ -29,4 +31,9 @@ void foo() {
 
     A._m12 = 3.14;           // write is OK
     A._m00_m00 = 1.xx;       // expected-error {{matrix is not assignable (contains duplicate components)}}
+
+    float4x4 B;
+    float5 vec5;
+    B._m00_m01_m02_m03_m10 = vec5;  // expected-error {{matrix swizzle length must be between 1 and 4 but is 5}}
+    float5 badVec5 = B._m00_m01_m02_m03_m10; // expected-error {{matrix swizzle length must be between 1 and 4 but is 5}}
 }



More information about the cfe-commits mailing list