[clang] [Matrix][HLSL] Allow memory layout to change via flags (PR #181866)
Farzon Lotfi via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 17 09:04:56 PST 2026
https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/181866
fixes #181859
This also fixes an issue introduced in
https://github.com/llvm/llvm-project/pull/179861
where we were doing the array vector layout in row major as DXC would define it.
>From 5a8d01e5a70c07742ffc2e28faed26fd66a21056 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 17 Feb 2026 11:57:15 -0500
Subject: [PATCH] [Matrix][HLSL] Allow memory layout to change via flags
fixes #181859
This also fixes an issue introduced in
https://github.com/llvm/llvm-project/pull/179861
where we were doing the array vector layout in row major as DXC would
define it.
---
clang/lib/CodeGen/CodeGenTypes.cpp | 13 +++-
.../BasicFeatures/MatrixElementTypeCast.hlsl | 24 +++----
.../MatrixExplicitTruncation.hlsl | 10 +--
.../MatrixImplicitTruncation.hlsl | 10 +--
.../MatrixSingleSubscriptConstSwizzle.hlsl | 4 +-
.../MatrixSingleSubscriptDynamicSwizzle.hlsl | 2 +-
.../MatrixSingleSubscriptGetter.hlsl | 10 +--
.../BasicFeatures/MatrixSplat.hlsl | 4 +-
clang/test/CodeGenHLSL/basic_types.hlsl | 33 ---------
clang/test/CodeGenHLSL/matrix_types.hlsl | 70 +++++++++++++++++++
10 files changed, 112 insertions(+), 68 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/matrix_types.hlsl
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index fd7a8929a9be9..f54921434986c 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -108,9 +108,16 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
if (Context.getLangOpts().HLSL) {
if (T->isConstantMatrixBoolType())
IRElemTy = ConvertTypeForMem(Context.BoolTy);
- llvm::Type *VecTy =
- llvm::FixedVectorType::get(IRElemTy, MT->getNumColumns());
- return llvm::ArrayType::get(VecTy, MT->getNumRows());
+
+ unsigned NumRows = MT->getNumRows();
+ unsigned NumCols = MT->getNumColumns();
+ bool IsRowMajor =
+ CGM.getContext().getLangOpts().getDefaultMatrixMemoryLayout() ==
+ LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+ unsigned VecLen = IsRowMajor ? NumCols : NumRows;
+ unsigned ArrayLen = IsRowMajor ? NumRows : NumCols;
+ llvm::Type *VecTy = llvm::FixedVectorType::get(IRElemTy, VecLen);
+ return llvm::ArrayType::get(VecTy, ArrayLen);
}
return llvm::ArrayType::get(IRElemTy, MT->getNumElementsFlattened());
}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
index f48edc19b86f7..b46d1efec1e1a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -5,8 +5,8 @@
// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast0u11matrix_typeILm3ELm2EfE(
// CHECK-SAME: <6 x float> noundef nofpclass(nan inf) [[F32:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca [3 x <2 x float>], align 4
-// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca [2 x <3 x float>], align 4
+// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <6 x float> [[F32]], ptr [[F32_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <6 x float>, ptr [[F32_ADDR]], align 4
// CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x float> [[TMP0]] to <6 x i32>
@@ -22,8 +22,8 @@ int3x2 elementwise_type_cast0(float3x2 f32) {
// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast1u11matrix_typeILm3ELm2EsE(
// CHECK-SAME: <6 x i16> noundef [[I16_32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I16_32_ADDR:%.*]] = alloca [3 x <2 x i16>], align 2
-// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT: [[I16_32_ADDR:%.*]] = alloca [2 x <3 x i16>], align 2
+// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <6 x i16> [[I16_32]], ptr [[I16_32_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load <6 x i16>, ptr [[I16_32_ADDR]], align 2
// CHECK-NEXT: [[CONV:%.*]] = sext <6 x i16> [[TMP0]] to <6 x i32>
@@ -39,8 +39,8 @@ int3x2 elementwise_type_cast1(int16_t3x2 i16_32) {
// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast2u11matrix_typeILm3ELm2ElE(
// CHECK-SAME: <6 x i64> noundef [[I64_32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[I64_32_ADDR:%.*]] = alloca [3 x <2 x i64>], align 8
-// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT: [[I64_32_ADDR:%.*]] = alloca [2 x <3 x i64>], align 8
+// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <6 x i64> [[I64_32]], ptr [[I64_32_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[I64_32_ADDR]], align 8
// CHECK-NEXT: [[CONV:%.*]] = trunc <6 x i64> [[TMP0]] to <6 x i32>
@@ -56,8 +56,8 @@ int3x2 elementwise_type_cast2(int64_t3x2 i64_32) {
// CHECK-LABEL: define hidden noundef <6 x i16> @_Z22elementwise_type_cast3u11matrix_typeILm2ELm3EDhE(
// CHECK-SAME: <6 x half> noundef nofpclass(nan inf) [[H23:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[H23_ADDR:%.*]] = alloca [2 x <3 x half>], align 2
-// CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i16>], align 2
+// CHECK-NEXT: [[H23_ADDR:%.*]] = alloca [3 x <2 x half>], align 2
+// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i16>], align 2
// CHECK-NEXT: store <6 x half> [[H23]], ptr [[H23_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load <6 x half>, ptr [[H23_ADDR]], align 2
// CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x half> [[TMP0]] to <6 x i16>
@@ -73,8 +73,8 @@ int16_t2x3 elementwise_type_cast3(half2x3 h23) {
// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast4u11matrix_typeILm3ELm2EdE(
// CHECK-SAME: <6 x double> noundef nofpclass(nan inf) [[D32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[D32_ADDR:%.*]] = alloca [3 x <2 x double>], align 8
-// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT: [[D32_ADDR:%.*]] = alloca [2 x <3 x double>], align 8
+// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <6 x double> [[D32]], ptr [[D32_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load <6 x double>, ptr [[D32_ADDR]], align 8
// CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x double> [[TMP0]] to <6 x i32>
@@ -91,7 +91,7 @@ int3x2 elementwise_type_cast4(double3x2 d32) {
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A:%.*]] = alloca [2 x [1 x i32]], align 4
-// CHECK-NEXT: [[B:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT: [[B:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [2 x [1 x i32]], align 4
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @__const._Z5call2v.A, i32 8, i1 false)
@@ -120,7 +120,7 @@ struct S {
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
-// CHECK-NEXT: [[A:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT: [[A:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z5call3v.s, i32 8, i1 false)
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
index 56f816806d63f..fb32478f2cac9 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
@@ -5,7 +5,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -22,7 +22,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
@@ -56,7 +56,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
@@ -73,7 +73,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
@@ -107,7 +107,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
index b58f567eb51d3..d8738c8948f0f 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -5,7 +5,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -22,7 +22,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
@@ -56,7 +56,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
@@ -73,7 +73,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
@@ -107,7 +107,7 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
index 2b950d8a51a38..57e4d0d6c459f 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
@@ -108,7 +108,7 @@ void setVectorOnMatrixSwizzle(out int2x3 M, int3 V) {
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[MINDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: [[MINDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
@@ -139,7 +139,7 @@ void setMatrixFromMatrix(out int2x3 M, int2x3 N, int MIndex) {
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[NINDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: [[NINDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
index 7190b6e1148a5..97921c785dc9d 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
@@ -115,7 +115,7 @@ int3 getMatrixSwizzle2x3(out int2x3 M, int index) {
// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
index efa9381b515af..735884911fc06 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
@@ -31,7 +31,7 @@ float4 getFloatVecMatrixDynamic(float4x4 M, int index) {
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z27getFloatScalarMatrixDynamicu11matrix_typeILm2ELm1EfEi(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
@@ -50,7 +50,7 @@ float getFloatScalarMatrixDynamic(float2x1 M, int index) {
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z28getFloatScalarMatrixConstantu11matrix_typeILm2ELm1EfE(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
// CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
@@ -65,7 +65,7 @@ float getFloatScalarMatrixConstant(float2x1 M) {
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z29getFloatScalarMatrixConstant2u11matrix_typeILm2ELm1EfE(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
// CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
@@ -207,7 +207,7 @@ int4 AddIntMatrixConstant(int4x4 M) {
// CHECK-LABEL: define hidden noundef <3 x i1> @_Z23getBoolVecMatrixDynamicu11matrix_typeILm2ELm3EbEi(
// CHECK-SAME: <6 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP0:%.*]] = zext <6 x i1> [[M]] to <6 x i32>
// CHECK-NEXT: store <6 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
@@ -255,7 +255,7 @@ bool4 getBoolVecMatrixConstant(bool4x4 M) {
// CHECK-LABEL: define hidden noundef i1 @_Z27getBoolScalarMatrixConstantu11matrix_typeILm3ELm1EbE(
// CHECK-SAME: <3 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x <1 x i32>], align 4
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <3 x i32>], align 4
// CHECK-NEXT: [[TMP0:%.*]] = zext <3 x i1> [[M]] to <3 x i32>
// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
index 768c1b8e02bea..5edb8a3dd4690 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
@@ -120,7 +120,7 @@ void ExplicitIntToBoolCastThenSplat(int3 Value) {
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT: [[M:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT: [[M:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store <2 x float> [[VALUE]], ptr [[VALUE_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[VALUE_ADDR]], align 8
// CHECK-NEXT: [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <2 x float> [[TMP0]], zeroinitializer
@@ -139,7 +139,7 @@ void ExplicitFloatToBoolCastThenSplat(float2 Value) {
// CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[M:%.*]] = alloca [3 x <2 x float>], align 4
+// CHECK-NEXT: [[M:%.*]] = alloca [2 x <3 x float>], align 4
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
// CHECK-NEXT: store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
diff --git a/clang/test/CodeGenHLSL/basic_types.hlsl b/clang/test/CodeGenHLSL/basic_types.hlsl
index 0aaf7a1b77797..8836126934957 100644
--- a/clang/test/CodeGenHLSL/basic_types.hlsl
+++ b/clang/test/CodeGenHLSL/basic_types.hlsl
@@ -38,22 +38,6 @@
// CHECK: @double2_Val = external hidden addrspace(2) global <2 x double>, align 16
// CHECK: @double3_Val = external hidden addrspace(2) global <3 x double>, align 32
// CHECK: @double4_Val = external hidden addrspace(2) global <4 x double>, align 32
-// CHECK: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], align 4
-// CHECK: @bool1x2_Val = external hidden addrspace(2) global [1 x <2 x i32>], align 4
-// CHECK: @bool1x3_Val = external hidden addrspace(2) global [1 x <3 x i32>], align 4
-// CHECK: @bool1x4_Val = external hidden addrspace(2) global [1 x <4 x i32>], align 4
-// CHECK: @bool2x1_Val = external hidden addrspace(2) global [2 x <1 x i32>], align 4
-// CHECK: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], align 4
-// CHECK: @bool2x3_Val = external hidden addrspace(2) global [2 x <3 x i32>], align 4
-// CHECK: @bool2x4_Val = external hidden addrspace(2) global [2 x <4 x i32>], align 4
-// CHECK: @bool3x1_Val = external hidden addrspace(2) global [3 x <1 x i32>], align 4
-// CHECK: @bool3x2_Val = external hidden addrspace(2) global [3 x <2 x i32>], align 4
-// CHECK: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], align 4
-// CHECK: @bool3x4_Val = external hidden addrspace(2) global [3 x <4 x i32>], align 4
-// CHECK: @bool4x1_Val = external hidden addrspace(2) global [4 x <1 x i32>], align 4
-// CHECK: @bool4x2_Val = external hidden addrspace(2) global [4 x <2 x i32>], align 4
-// CHECK: @bool4x3_Val = external hidden addrspace(2) global [4 x <3 x i32>], align 4
-// CHECK: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], align 4
#ifdef NAMESPACED
#define TYPE_DECL(T) hlsl::T T##_Val
@@ -109,20 +93,3 @@ TYPE_DECL( float4 );
TYPE_DECL( double2 );
TYPE_DECL( double3 );
TYPE_DECL( double4 );
-
-TYPE_DECL( bool1x1 );
-TYPE_DECL( bool1x2 );
-TYPE_DECL( bool1x3 );
-TYPE_DECL( bool1x4 );
-TYPE_DECL( bool2x1 );
-TYPE_DECL( bool2x2 );
-TYPE_DECL( bool2x3 );
-TYPE_DECL( bool2x4 );
-TYPE_DECL( bool3x1 );
-TYPE_DECL( bool3x2 );
-TYPE_DECL( bool3x3 );
-TYPE_DECL( bool3x4 );
-TYPE_DECL( bool4x1 );
-TYPE_DECL( bool4x2 );
-TYPE_DECL( bool4x3 );
-TYPE_DECL( bool4x4 );
diff --git a/clang/test/CodeGenHLSL/matrix_types.hlsl b/clang/test/CodeGenHLSL/matrix_types.hlsl
new file mode 100644
index 0000000000000..1c2f9cd316543
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix_types.hlsl
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
+// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - | FileCheck %s --check-prefix=CHECK-ROW-MAJOR
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
+// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - -DNAMESPACED| FileCheck %s --check-prefix=CHECK-ROW-MAJOR
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
+// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - | FileCheck %s --check-prefix=CHECK-COL-MAJOR
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
+// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - -DNAMESPACED| FileCheck %s --check-prefix=CHECK-COL-MAJOR
+
+// CHECK-ROW-MAJOR: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool1x2_Val = external hidden addrspace(2) global [1 x <2 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool1x3_Val = external hidden addrspace(2) global [1 x <3 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool1x4_Val = external hidden addrspace(2) global [1 x <4 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x1_Val = external hidden addrspace(2) global [2 x <1 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x3_Val = external hidden addrspace(2) global [2 x <3 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x4_Val = external hidden addrspace(2) global [2 x <4 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x1_Val = external hidden addrspace(2) global [3 x <1 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x2_Val = external hidden addrspace(2) global [3 x <2 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x4_Val = external hidden addrspace(2) global [3 x <4 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x1_Val = external hidden addrspace(2) global [4 x <1 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x2_Val = external hidden addrspace(2) global [4 x <2 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x3_Val = external hidden addrspace(2) global [4 x <3 x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], align 4
+
+// CHECK-COL-MAJOR: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], align 4
+// CHECK-COL-MAJOR: @bool1x2_Val = external hidden addrspace(2) global [2 x <1 x i32>], align 4
+// CHECK-COL-MAJOR: @bool1x3_Val = external hidden addrspace(2) global [3 x <1 x i32>], align 4
+// CHECK-COL-MAJOR: @bool1x4_Val = external hidden addrspace(2) global [4 x <1 x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x1_Val = external hidden addrspace(2) global [1 x <2 x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x3_Val = external hidden addrspace(2) global [3 x <2 x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x4_Val = external hidden addrspace(2) global [4 x <2 x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x1_Val = external hidden addrspace(2) global [1 x <3 x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x2_Val = external hidden addrspace(2) global [2 x <3 x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x4_Val = external hidden addrspace(2) global [4 x <3 x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x1_Val = external hidden addrspace(2) global [1 x <4 x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x2_Val = external hidden addrspace(2) global [2 x <4 x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x3_Val = external hidden addrspace(2) global [3 x <4 x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], align 4
+
+#ifdef NAMESPACED
+#define TYPE_DECL(T) hlsl::T T##_Val
+#else
+#define TYPE_DECL(T) T T##_Val
+#endif
+
+TYPE_DECL( bool1x1 );
+TYPE_DECL( bool1x2 );
+TYPE_DECL( bool1x3 );
+TYPE_DECL( bool1x4 );
+TYPE_DECL( bool2x1 );
+TYPE_DECL( bool2x2 );
+TYPE_DECL( bool2x3 );
+TYPE_DECL( bool2x4 );
+TYPE_DECL( bool3x1 );
+TYPE_DECL( bool3x2 );
+TYPE_DECL( bool3x3 );
+TYPE_DECL( bool3x4 );
+TYPE_DECL( bool4x1 );
+TYPE_DECL( bool4x2 );
+TYPE_DECL( bool4x3 );
+TYPE_DECL( bool4x4 );
More information about the cfe-commits
mailing list