[clang] [HLSL][Matrix] Allow flattening matrix types in FlattenAccessAndTypeLValue (PR #177708)
Deric C. via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 17 11:29:47 PST 2026
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/177708
>From 21287b87994c3dce5e1615c9bace88d819526324 Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Fri, 23 Jan 2026 15:13:32 -0800
Subject: [PATCH 1/6] Enable explicit cast of matrices to vectors
---
clang/lib/CodeGen/CGExpr.cpp | 21 +++++-
.../BasicFeatures/MatrixToVectorCast.hlsl | 70 +++++++++++++++++++
2 files changed, 89 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 76a3939cd28eb..5d595c2e3b1e5 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -7117,8 +7117,6 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
while (!WorkList.empty()) {
auto [LVal, T, IdxList] = WorkList.pop_back_val();
T = T.getCanonicalType().getUnqualifiedType();
- assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL");
-
if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) {
uint64_t Size = CAT->getZExtSize();
for (int64_t I = Size - 1; I > -1; I--) {
@@ -7192,6 +7190,25 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
Base.getBaseInfo(), TBAAAccessInfo());
AccessList.emplace_back(LV);
}
+ } else if (const auto *MT = dyn_cast<ConstantMatrixType>(T)) {
+ // Matrices are represented as flat arrays in memory, but has a vector
+ // value type. So we use ConvertMatrixAddress to convert the address from
+ // array to vector, and extract elements similar to the vector case above.
+ // The order in which we iterate over the elements is sequentially in
+ // memory; whether the matrix is in row- or column-major order does not
+ // matter.
+ llvm::Type *LLVMT = ConvertTypeForMem(T);
+ CharUnits Align = getContext().getTypeAlignInChars(T);
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT,
+ Align, "matrix.gep");
+ LValue Base = MakeAddrLValue(GEP, T);
+ Address MatAddr = MaybeConvertMatrixAddress(Base.getAddress(), *this);
+ for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) {
+ llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I);
+ LValue LV = LValue::MakeMatrixElt(MatAddr, Idx, MT->getElementType(),
+ Base.getBaseInfo(), TBAAAccessInfo());
+ AccessList.emplace_back(LV);
+ }
} else { // a scalar/builtin type
if (!IdxList.empty()) {
llvm::Type *LLVMT = ConvertTypeForMem(T);
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
new file mode 100644
index 0000000000000..06af2ebe72473
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
@@ -0,0 +1,70 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z2fnu11matrix_typeILm2ELm2EfE(
+// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
+// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[MATRIXEXT3]], i64 3
+// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[V]], align 16
+// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[V]], align 16
+// CHECK-NEXT: ret <4 x float> [[TMP10]]
+//
+float4 fn(float2x2 M) {
+ float4 V = (float4)M;
+ return V;
+}
+
+// CHECK-LABEL: define hidden noundef <3 x i32> @_Z3fn2u11matrix_typeILm3ELm1EiE(
+// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x i32], align 4
+// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16
+// CHECK-NEXT: store <3 x i32> [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <3 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[FLATCAST_TMP]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <3 x i32> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[MATRIXEXT]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <3 x i32> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[MATRIXEXT1]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <3 x i32> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[MATRIXEXT2]], i64 2
+// CHECK-NEXT: store <3 x i32> [[TMP7]], ptr [[V]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = load <3 x i32>, ptr [[V]], align 16
+// CHECK-NEXT: ret <3 x i32> [[TMP8]]
+//
+int3 fn2(int3x1 M) {
+ int3 V = (int3)M;
+ return V;
+}
+
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// COL-CHECK: {{.*}}
+// ROW-CHECK: {{.*}}
>From 805aebe5ff6bd6b39f07c383d5531cc51759c25e Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Mon, 26 Jan 2026 10:17:01 -0800
Subject: [PATCH 2/6] Adjust index iteration for matrix memory layout
---
clang/lib/CodeGen/CGExpr.cpp | 18 ++++++++++++++----
.../BasicFeatures/MatrixToVectorCast.hlsl | 12 +++++-------
2 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5d595c2e3b1e5..9a6629e4b6f9f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -7194,17 +7194,27 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
// Matrices are represented as flat arrays in memory, but has a vector
// value type. So we use ConvertMatrixAddress to convert the address from
// array to vector, and extract elements similar to the vector case above.
- // The order in which we iterate over the elements is sequentially in
- // memory; whether the matrix is in row- or column-major order does not
- // matter.
+ // The order in which we iterate over the elements must respect the
+ // matrix memory layout, computing the proper index for each (row, col).
llvm::Type *LLVMT = ConvertTypeForMem(T);
CharUnits Align = getContext().getTypeAlignInChars(T);
Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT,
Align, "matrix.gep");
LValue Base = MakeAddrLValue(GEP, T);
Address MatAddr = MaybeConvertMatrixAddress(Base.getAddress(), *this);
+ unsigned NumRows = MT->getNumRows();
+ unsigned NumCols = MT->getNumColumns();
+ bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
+ LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+ llvm::MatrixBuilder MB(Builder);
for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) {
- llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I);
+ // Compute (row, col) from linear index assuming row-major iteration.
+ unsigned Row = I / NumCols;
+ unsigned Col = I % NumCols;
+ llvm::Value *RowIdx = llvm::ConstantInt::get(IdxTy, Row);
+ llvm::Value *ColIdx = llvm::ConstantInt::get(IdxTy, Col);
+ llvm::Value *Idx =
+ MB.CreateIndex(RowIdx, ColIdx, NumRows, NumCols, IsMatrixRowMajor);
LValue LV = LValue::MakeMatrixElt(MatAddr, Idx, MT->getElementType(),
Base.getBaseInfo(), TBAAAccessInfo());
AccessList.emplace_back(LV);
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
index 06af2ebe72473..0485fb73197ad 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK
@@ -18,10 +17,12 @@
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+// COL-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
+// ROW-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
+// COL-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
+// ROW-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2
// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
@@ -36,7 +37,7 @@ float4 fn(float2x2 M) {
}
// CHECK-LABEL: define hidden noundef <3 x i32> @_Z3fn2u11matrix_typeILm3ELm1EiE(
-// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x i32], align 4
// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16
@@ -65,6 +66,3 @@ int3 fn2(int3x1 M) {
return V;
}
-//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-// COL-CHECK: {{.*}}
-// ROW-CHECK: {{.*}}
>From 98e55a9ea34b9387600ca6d5e650d81099f6dd9c Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 27 Jan 2026 10:48:18 -0800
Subject: [PATCH 3/6] Move tests to existing VectorElementwiseCast test file
---
.../BasicFeatures/MatrixToVectorCast.hlsl | 68 -------------------
.../BasicFeatures/VectorElementwiseCast.hlsl | 61 ++++++++++++++++-
2 files changed, 60 insertions(+), 69 deletions(-)
delete mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
deleted file mode 100644
index 0485fb73197ad..0000000000000
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl
+++ /dev/null
@@ -1,68 +0,0 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK
-
-// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z2fnu11matrix_typeILm2ELm2EfE(
-// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x float], align 4
-// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4
-// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4
-// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4
-// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0
-// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
-// COL-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
-// ROW-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
-// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1
-// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
-// COL-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
-// ROW-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
-// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2
-// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
-// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[MATRIXEXT3]], i64 3
-// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[V]], align 16
-// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[V]], align 16
-// CHECK-NEXT: ret <4 x float> [[TMP10]]
-//
-float4 fn(float2x2 M) {
- float4 V = (float4)M;
- return V;
-}
-
-// CHECK-LABEL: define hidden noundef <3 x i32> @_Z3fn2u11matrix_typeILm3ELm1EiE(
-// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x i32], align 4
-// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16
-// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4
-// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16
-// CHECK-NEXT: store <3 x i32> [[M]], ptr [[M_ADDR]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
-// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4
-// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <3 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[FLATCAST_TMP]], align 16
-// CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <3 x i32> [[TMP2]], i32 0
-// CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[MATRIXEXT]], i64 0
-// CHECK-NEXT: [[TMP4:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <3 x i32> [[TMP4]], i32 1
-// CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[MATRIXEXT1]], i64 1
-// CHECK-NEXT: [[TMP6:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
-// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <3 x i32> [[TMP6]], i32 2
-// CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[MATRIXEXT2]], i64 2
-// CHECK-NEXT: store <3 x i32> [[TMP7]], ptr [[V]], align 16
-// CHECK-NEXT: [[TMP8:%.*]] = load <3 x i32>, ptr [[V]], align 16
-// CHECK-NEXT: ret <3 x i32> [[TMP8]]
-//
-int3 fn2(int3x1 M) {
- int3 V = (int3)M;
- return V;
-}
-
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 26aa41aaf4626..c11c8498ada45 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK
// vector flat cast from array
// CHECK-LABEL: define void {{.*}}call2
@@ -121,3 +122,61 @@ struct Derived : BFields {
export void call6(Derived D) {
int4 A = (int4)D;
}
+
+// vector flat cast from matrix of same size (float)
+// CHECK-LABEL: call7
+// CHECK: [[M_ADDR:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> %M, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// COL-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
+// ROW-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// COL-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
+// ROW-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
+// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[MATRIXEXT3]], i64 3
+// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[V]], align 16
+// CHECK-NEXT: ret void
+export void call7(float2x2 M) {
+ float4 V = (float4)M;
+}
+
+// vector flat cast from matrix of same size (int)
+// CHECK-LABEL: call8
+// CHECK: [[M_ADDR:%.*]] = alloca [3 x i32], align 4
+// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16
+// CHECK-NEXT: store <3 x i32> %M, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <3 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[FLATCAST_TMP]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <3 x i32> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[MATRIXEXT]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <3 x i32> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[MATRIXEXT1]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <3 x i32> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[MATRIXEXT2]], i64 2
+// CHECK-NEXT: store <3 x i32> [[TMP7]], ptr [[V]], align 16
+// CHECK-NEXT: ret void
+export void call8(int3x1 M) {
+ int3 V = (int3)M;
+}
+
>From cd13943548d84e49290501a03165113fcfbbf6a8 Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 27 Jan 2026 10:53:00 -0800
Subject: [PATCH 4/6] Fixup code comments regarding matrix element indexing
---
clang/lib/CodeGen/CGExpr.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9a6629e4b6f9f..fcd5a30d6ac19 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -7194,8 +7194,8 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
// Matrices are represented as flat arrays in memory, but has a vector
// value type. So we use ConvertMatrixAddress to convert the address from
// array to vector, and extract elements similar to the vector case above.
- // The order in which we iterate over the elements must respect the
- // matrix memory layout, computing the proper index for each (row, col).
+ // The matrix elements are iterated over in row-major order regardless of
+ // the memory layout of the matrix.
llvm::Type *LLVMT = ConvertTypeForMem(T);
CharUnits Align = getContext().getTypeAlignInChars(T);
Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT,
@@ -7208,7 +7208,6 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
LangOptions::MatrixMemoryLayout::MatrixRowMajor;
llvm::MatrixBuilder MB(Builder);
for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) {
- // Compute (row, col) from linear index assuming row-major iteration.
unsigned Row = I / NumCols;
unsigned Col = I % NumCols;
llvm::Value *RowIdx = llvm::ConstantInt::get(IdxTy, Row);
>From 77fe68ddfddb0b3dae34ca900c00df8a5ace7d43 Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 27 Jan 2026 14:16:41 -0800
Subject: [PATCH 5/6] Use two-dimensional for-loop instead of a single for-loop
---
clang/lib/CodeGen/CGExpr.cpp | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index fcd5a30d6ac19..490377c04b034 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -7207,16 +7207,17 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
LangOptions::MatrixMemoryLayout::MatrixRowMajor;
llvm::MatrixBuilder MB(Builder);
- for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) {
- unsigned Row = I / NumCols;
- unsigned Col = I % NumCols;
- llvm::Value *RowIdx = llvm::ConstantInt::get(IdxTy, Row);
- llvm::Value *ColIdx = llvm::ConstantInt::get(IdxTy, Col);
- llvm::Value *Idx =
- MB.CreateIndex(RowIdx, ColIdx, NumRows, NumCols, IsMatrixRowMajor);
- LValue LV = LValue::MakeMatrixElt(MatAddr, Idx, MT->getElementType(),
- Base.getBaseInfo(), TBAAAccessInfo());
- AccessList.emplace_back(LV);
+ for (unsigned Row = 0; Row < MT->getNumRows(); Row++) {
+ for (unsigned Col = 0; Col < MT->getNumColumns(); Col++) {
+ llvm::Value *RowIdx = llvm::ConstantInt::get(IdxTy, Row);
+ llvm::Value *ColIdx = llvm::ConstantInt::get(IdxTy, Col);
+ llvm::Value *Idx = MB.CreateIndex(RowIdx, ColIdx, NumRows, NumCols,
+ IsMatrixRowMajor);
+ LValue LV =
+ LValue::MakeMatrixElt(MatAddr, Idx, MT->getElementType(),
+ Base.getBaseInfo(), TBAAAccessInfo());
+ AccessList.emplace_back(LV);
+ }
}
} else { // a scalar/builtin type
if (!IdxList.empty()) {
>From 4a7d85e9a8d62a9e198b225d47dd1b422feca5a2 Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 17 Feb 2026 11:29:20 -0800
Subject: [PATCH 6/6] Update matrix allocas in test for array of vectors
representation
---
.../BasicFeatures/VectorElementwiseCast.hlsl | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index c11c8498ada45..5242efd77b3b4 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -125,9 +125,9 @@ export void call6(Derived D) {
// vector flat cast from matrix of same size (float)
// CHECK-LABEL: call7
-// CHECK: [[M_ADDR:%.*]] = alloca [4 x float], align 4
+// CHECK: [[M_ADDR:%.*]] = alloca [2 x <2 x float>], align 4
// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x <2 x float>], align 4
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16
// CHECK-NEXT: store <4 x float> %M, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4
@@ -156,9 +156,11 @@ export void call7(float2x2 M) {
// vector flat cast from matrix of same size (int)
// CHECK-LABEL: call8
-// CHECK: [[M_ADDR:%.*]] = alloca [3 x i32], align 4
+// COL-CHECK: [[M_ADDR:%.*]] = alloca [1 x <3 x i32>], align 4
+// ROW-CHECK: [[M_ADDR:%.*]] = alloca [3 x <1 x i32>], align 4
// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16
-// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4
+// COL-CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [1 x <3 x i32>], align 4
+// ROW-CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x <1 x i32>], align 4
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16
// CHECK-NEXT: store <3 x i32> %M, ptr [[M_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
More information about the cfe-commits
mailing list