[clang] [llvm] [HLSL][Matrix] Change MatrixSubscriptExpr flattened index to row major (PR #165666)
Farzon Lotfi via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 30 00:35:14 PDT 2025
https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/165666
fixes #165663
After experimenting with clang vs DXC via the offload test suite. Noticed clang was doing column major init:
```
1,3,5
2,4,6
[row][col] -->
[0][0] = 1
[0][1] = 3
[0][2] = 5
[1][0] = 2
[1][1] = 4
[1][2] = 6
```
While DXC was doing row major indexing
```
1,2,3,
4,5,6
[row][col] -->
[0][0] = 1
[0][1] = 2
[0][2] = 3
[1][0] = 4
[1][1] = 5
[1][2] = 6
```
Reading the docs See https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-per-component-math#matrix-ordering
```
Row-major and column-major packing order has no influence on the packing order of constructors
(which always follows row-major ordering).
```
So changed up the flatten index and cofirmed it did not change for c\c++ via testing.
>From d55784cd8744ecc0506d656ad81f8bf051bf5873 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 30 Oct 2025 00:21:49 -0700
Subject: [PATCH] [HLSL][Matrix] Change MatrixSubscriptExpr flattened index to
row major
fixes #165663
After experimenting with clang vs DXC via the offload test suite.
Noticed clang was doing column major init:
```
1,3,5
2,4,6
[row][col] -->
[0][0] = 1
[0][1] = 3
[0][2] = 5
[1][0] = 2
[1][1] = 4
[1][2] = 6
```
While DXC was doing row major indexing
```
1,2,3,
4,5,6
[row][col] -->
[0][0] = 1
[0][1] = 2
[0][2] = 3
[1][0] = 4
[1][1] = 5
[1][2] = 6
```
Reading the docs See https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-per-component-math#matrix-ordering
```
Row-major and column-major packing order has no influence on the packing order of constructors
(which always follows row-major ordering).
```
So changed up the flatten index and cofirmed it did not change for c\c++
via testing.
---
clang/lib/CodeGen/CGExpr.cpp | 17 ++++++---
clang/lib/CodeGen/CGExprScalar.cpp | 12 +++++--
clang/test/CodeGen/matrix-type-indexing.c | 35 +++++++++++++++++++
.../test/CodeGenCXX/matrix-type-indexing.cpp | 35 +++++++++++++++++++
.../BasicFeatures/matrix-type-indexing.hlsl | 30 ++++++++++++++++
llvm/include/llvm/IR/MatrixBuilder.h | 17 +++++++--
6 files changed, 137 insertions(+), 9 deletions(-)
create mode 100644 clang/test/CodeGen/matrix-type-indexing.c
create mode 100644 clang/test/CodeGenCXX/matrix-type-indexing.cpp
create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 01f2161f27555..208eb98972b23 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4824,11 +4824,18 @@ LValue CodeGenFunction::EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E) {
llvm::Value *RowIdx = EmitMatrixIndexExpr(E->getRowIdx());
llvm::Value *ColIdx = EmitMatrixIndexExpr(E->getColumnIdx());
- llvm::Value *NumRows = Builder.getIntN(
- RowIdx->getType()->getScalarSizeInBits(),
- E->getBase()->getType()->castAs<ConstantMatrixType>()->getNumRows());
- llvm::Value *FinalIdx =
- Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx);
+ llvm::Value *FinalIdx;
+ if (getLangOpts().HLSL) {
+ llvm::Value *NumCols = Builder.getIntN(
+ RowIdx->getType()->getScalarSizeInBits(),
+ E->getBase()->getType()->castAs<ConstantMatrixType>()->getNumColumns());
+ FinalIdx = Builder.CreateAdd(Builder.CreateMul(RowIdx, NumCols), ColIdx);
+ } else {
+ llvm::Value *NumRows = Builder.getIntN(
+ RowIdx->getType()->getScalarSizeInBits(),
+ E->getBase()->getType()->castAs<ConstantMatrixType>()->getNumRows());
+ FinalIdx = Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx);
+ }
return LValue::MakeMatrixElt(
MaybeConvertMatrixAddress(Base.getAddress(), *this), FinalIdx,
E->getBase()->getType(), Base.getBaseInfo(), TBAAAccessInfo());
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 714192db1b15c..5220621fe7a70 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2118,9 +2118,17 @@ Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
Value *ColumnIdx = CGF.EmitMatrixIndexExpr(E->getColumnIdx());
const auto *MatrixTy = E->getBase()->getType()->castAs<ConstantMatrixType>();
- unsigned NumRows = MatrixTy->getNumRows();
+
llvm::MatrixBuilder MB(Builder);
- Value *Idx = MB.CreateIndex(RowIdx, ColumnIdx, NumRows);
+ Value *Idx;
+ if (CGF.getLangOpts().HLSL) {
+ unsigned NumCols = MatrixTy->getNumColumns();
+ Idx = MB.createRowMajorIndex(RowIdx, ColumnIdx, NumCols);
+ } else {
+ unsigned NumRows = MatrixTy->getNumRows();
+ Idx = MB.createColumnMajorIndex(RowIdx, ColumnIdx, NumRows);
+ }
+
if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0)
MB.CreateIndexAssumption(Idx, MatrixTy->getNumElementsFlattened());
diff --git a/clang/test/CodeGen/matrix-type-indexing.c b/clang/test/CodeGen/matrix-type-indexing.c
new file mode 100644
index 0000000000000..19b9f02f5b0a4
--- /dev/null
+++ b/clang/test/CodeGen/matrix-type-indexing.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+ typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
+ float Out[6];
+
+void one_index_test(int index, fx2x3_t M) {
+ // CHECK-LABEL: one_index_test
+ // CHECK: %row = alloca i32, align 4
+ // CHECK: %col = alloca i32, align 4
+ // CHECK: [[row_load:%.*]] = load i32, ptr %row, align 4
+ // CHECK-NEXT: [[row_load_zext:%.*]] = zext i32 [[row_load]] to i64
+ // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col, align 4
+ // CHECK-NEXT: [[col_load_zext:%.*]] = zext i32 [[col_load]] to i64
+ // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_zext]], 2
+ // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_zext]]
+ // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4
+ // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]]
+ const unsigned int COLS = 3;
+ unsigned int row = index / COLS;
+ unsigned int col = index % COLS;
+ Out[index] = M[row][col];
+}
+
+float two_index_test(int row, int col, fx2x3_t M) {
+ // CHECK-LABEL: two_index_test
+ // CHECK: [[row_load:%.*]] = load i32, ptr %row.addr, align 4
+ // CHECK-NEXT: [[row_load_sext:%.*]] = sext i32 [[row_load]] to i64
+ // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col.addr, align 4
+ // CHECK-NEXT: [[col_load_sext:%.*]] = sext i32 [[col_load]] to i64
+ // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_sext]], 2
+ // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_sext]]
+ // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4
+ // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]]
+ return M[row][col];
+}
diff --git a/clang/test/CodeGenCXX/matrix-type-indexing.cpp b/clang/test/CodeGenCXX/matrix-type-indexing.cpp
new file mode 100644
index 0000000000000..b0ed5dc1d717e
--- /dev/null
+++ b/clang/test/CodeGenCXX/matrix-type-indexing.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - -std=c++11 | FileCheck %s
+
+ typedef float fx2x3_t __attribute__((matrix_type(2, 3)));
+ float Out[6];
+
+void one_index_test(int index, fx2x3_t M) {
+ // CHECK-LABEL: one_index_test
+ // CHECK: %row = alloca i32, align 4
+ // CHECK: %col = alloca i32, align 4
+ // CHECK: [[row_load:%.*]] = load i32, ptr %row, align 4
+ // CHECK-NEXT: [[row_load_zext:%.*]] = zext i32 [[row_load]] to i64
+ // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col, align 4
+ // CHECK-NEXT: [[col_load_zext:%.*]] = zext i32 [[col_load]] to i64
+ // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_zext]], 2
+ // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_zext]]
+ // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4
+ // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]]
+ const unsigned int COLS = 3;
+ unsigned int row = index / COLS;
+ unsigned int col = index % COLS;
+ Out[index] = M[row][col];
+}
+
+float two_index_test(int row, int col, fx2x3_t M) {
+ // CHECK-LABEL: two_index_test
+ // CHECK: [[row_load:%.*]] = load i32, ptr %row.addr, align 4
+ // CHECK-NEXT: [[row_load_sext:%.*]] = sext i32 [[row_load]] to i64
+ // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col.addr, align 4
+ // CHECK-NEXT: [[col_load_sext:%.*]] = sext i32 [[col_load]] to i64
+ // CHECK-NEXT: [[col_offset:%.*]] = mul i64 [[col_load_sext]], 2
+ // CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load_sext]]
+ // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr %M.addr, align 4
+ // CHECK-NEXT: %matrixext = extractelement <6 x float> [[matrix_as_vec]], i64 [[col_major_index]]
+ return M[row][col];
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl
new file mode 100644
index 0000000000000..ff63b23311e77
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/matrix-type-indexing.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// NOTE: This test is only to confirm we can do codgen with the matrix alias.
+
+RWBuffer<int> Out : register(u1);
+
+void one_index_test(int index, half2x3 M) {
+ // CHECK-LABEL: one_index_test
+ // CHECK: %row = alloca i32, align 4
+ // CHECK: %col = alloca i32, align 4
+ // CHECK: [[row_load:%.*]] = load i32, ptr %row, align 4
+ // CHECK-NEXT: [[col_load:%.*]] = load i32, ptr %col, align 4
+ // CHECK-NEXT: [[row_offset:%.*]] = mul i32 [[row_load]], 3
+ // CHECK-NEXT: [[row_major_index:%.*]] = add i32 [[row_offset]], [[col_load]]
+ // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x half>, ptr %M.addr, align 2
+ // CHECK-NEXT: %matrixext = extractelement <6 x half> [[matrix_as_vec]], i32 [[row_major_index]]
+ const uint COLS = 3;
+ uint row = index / COLS;
+ uint col = index % COLS;
+ Out[index] = M[row][col];
+}
+
+half two_index_test(int row, int col, half2x3 M) {
+ // CHECK-LABEL: two_index_test
+ // CHECK: [[row_offset:%.*]] = mul i32 [[row_load:%.*]], 3
+ // CHECK-NEXT: [[row_major_index:%.*]] = add i32 [[row_offset]], [[col_load:%.*]]
+ // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x half>, ptr %M.addr, align 2
+ // CHECK-NEXT: %matrixext = extractelement <6 x half> [[matrix_as_vec]], i32 [[row_major_index]]
+ return M[row][col];
+}
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index 3a04ca87f2b55..f90cd92096103 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -241,8 +241,8 @@ class MatrixBuilder {
/// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from
/// a matrix with \p NumRows embedded in a vector.
- Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows,
- Twine const &Name = "") {
+ Value *createColumnMajorIndex(Value *RowIdx, Value *ColumnIdx,
+ unsigned NumRows, Twine const &Name = "") {
unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
ColumnIdx->getType()->getScalarSizeInBits());
Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth);
@@ -251,6 +251,19 @@ class MatrixBuilder {
Value *NumRowsV = B.getIntN(MaxWidth, NumRows);
return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx);
}
+
+ /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from
+ /// a matrix with \p NumRows embedded in a vector.
+ Value *createRowMajorIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumCols,
+ Twine const &Name = "") {
+ unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
+ ColumnIdx->getType()->getScalarSizeInBits());
+ Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth);
+ RowIdx = B.CreateZExt(RowIdx, IntTy);
+ ColumnIdx = B.CreateZExt(ColumnIdx, IntTy);
+ Value *NumColsV = B.getIntN(MaxWidth, NumCols);
+ return B.CreateAdd(B.CreateMul(RowIdx, NumColsV), ColumnIdx);
+ }
};
} // end namespace llvm
More information about the cfe-commits
mailing list