[clang] [HLSL][Matrix] Make matrix truncation respect default matrix memory layout (PR #184280)
Deric C. via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 2 17:53:47 PST 2026
https://github.com/Icohedron created https://github.com/llvm/llvm-project/pull/184280
Fixes #183127
This PR makes the matrix truncation cast implementation use the new matrix flattened index helper functions introduced by #182904 so that it reads elements from the source matrix using the default matrix memory layout instead of always assuming column-major order.
This PR also fixes a bug where matrix truncation always resulted in a truncated matrix whose memory layout is row-major order regardless of the default matrix memory layout.
Assisted-by: claude-opus-4.6
>From 9e181cf78fe2c8a9589af6ec735fd6deaa47f7aa Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Mon, 2 Mar 2026 10:06:14 -0800
Subject: [PATCH 1/3] Add missing const to getFlattenedIndex function
---
clang/include/clang/AST/TypeBase.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index dc4442bfeb795..ec7845c3b3adb 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -4435,7 +4435,7 @@ class ConstantMatrixType final : public MatrixType {
/// row-major order flattened index. Otherwise, returns the column-major order
/// flattened index.
unsigned getFlattenedIndex(unsigned Row, unsigned Column,
- bool IsRowMajor = false) {
+ bool IsRowMajor = false) const {
return IsRowMajor ? getRowMajorFlattenedIndex(Row, Column)
: getColumnMajorFlattenedIndex(Row, Column);
}
>From 4d0fcfd54f9acc1d27b965a29ee612ce77e290be Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Mon, 2 Mar 2026 17:10:44 -0800
Subject: [PATCH 2/3] Add matrix index unflatten helper functions
Assisted-by: claude-opus-4.6
---
clang/include/clang/AST/TypeBase.h | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index ec7845c3b3adb..d913c93fdce92 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -4457,6 +4457,30 @@ class ConstantMatrixType final : public MatrixType {
return Column * NumRows + Row;
}
+ /// Given a row-major flattened index \p Index, return the corresponding
+ /// {row, column} position.
+ std::pair<unsigned, unsigned>
+ getRowMajorRowAndColumn(unsigned Index) const {
+ return {Index / NumColumns, Index % NumColumns};
+ }
+
+ /// Given a column-major flattened index \p Index, return the corresponding
+ /// {row, column} position.
+ std::pair<unsigned, unsigned>
+ getColumnMajorRowAndColumn(unsigned Index) const {
+ return {Index % NumRows, Index / NumRows};
+ }
+
+ /// Given a flattened index \p Index, return the corresponding {row, column}
+ /// position. If \p IsRowMajor is true, interprets \p Index as a row-major
+ /// flattened index. Otherwise, interprets it as a column-major flattened
+ /// index.
+ std::pair<unsigned, unsigned> getRowAndColumn(unsigned Index,
+ bool IsRowMajor = false) const {
+ return IsRowMajor ? getRowMajorRowAndColumn(Index)
+ : getColumnMajorRowAndColumn(Index);
+ }
+
void Profile(llvm::FoldingSetNodeID &ID) {
Profile(ID, getElementType(), getNumRows(), getNumColumns(),
getTypeClass());
>From 3e6edfa5394065930d2b08a9dcbae5df50267d97 Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Mon, 2 Mar 2026 14:41:38 -0800
Subject: [PATCH 3/3] Make matrix truncation respect default matrix memory
layout
Assisted-by: claude-opus-4.6
---
clang/lib/CodeGen/CGExprScalar.cpp | 17 ++++-----
.../MatrixExplicitTruncation.hlsl | 36 ++++++++++++-------
.../MatrixImplicitTruncation.hlsl | 33 +++++++++++------
3 files changed, 55 insertions(+), 31 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 06eadb6c07507..35e3796a42876 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3118,14 +3118,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
SmallVector<int> Mask;
unsigned NumCols = MatTy->getNumColumns();
unsigned NumRows = MatTy->getNumRows();
- unsigned ColOffset = NumCols;
- if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
- ColOffset = SrcMatTy->getNumColumns();
- for (unsigned R = 0; R < NumRows; R++) {
- for (unsigned C = 0; C < NumCols; C++) {
- unsigned I = R * ColOffset + C;
- Mask.push_back(I);
- }
+ auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>();
+ assert(SrcMatTy && "Source type must be a matrix type.");
+ assert(NumRows <= SrcMatTy->getNumRows());
+ assert(NumCols <= SrcMatTy->getNumColumns());
+ bool IsRowMajor = CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
+ LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+ for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
+ auto [Row, Col] = MatTy->getRowAndColumn(I, IsRowMajor);
+ Mask.push_back(SrcMatTy->getFlattenedIndex(Row, Col, IsRowMajor));
}
return Builder.CreateShuffleVector(Mat, Mask, "trunc");
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
index fb32478f2cac9..587ccfe88a627 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
@@ -1,14 +1,17 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK
// CHECK-LABEL: define hidden noundef <12 x i32> @_Z10trunc_castu11matrix_typeILm4ELm4EiE(
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
+// ROW-CHECK-NEXT: [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// COL-CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -22,10 +25,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
+// ROW-CHECK-NEXT: [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// COL-CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -56,10 +61,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
+// ROW-CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// COL-CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -73,10 +80,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
+// ROW-CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// COL-CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -107,10 +116,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
+// ROW-CHECK-NEXT: [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// COL-CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
// CHECK-NEXT: ret <2 x i32> [[TMP1]]
@@ -144,7 +155,8 @@
// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
// CHECK-NEXT: [[CAST_MTRUNC:%.*]] = extractelement <12 x i32> [[TRUNC]], i32 0
// CHECK-NEXT: store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
index d8738c8948f0f..1a9a953ef8367 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -1,14 +1,17 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK
// CHECK-LABEL: define hidden noundef <12 x i32> @_Z10trunc_castu11matrix_typeILm4ELm4EiE(
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
+// ROW-CHECK-NEXT: [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// COL-CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -22,10 +25,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
+// ROW-CHECK-NEXT: [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// COL-CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -56,10 +61,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
+// ROW-CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// COL-CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -73,10 +80,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
+// ROW-CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// COL-CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -107,10 +116,12 @@
// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
+// ROW-CHECK-NEXT: [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// COL-CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
+// ROW-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
+// COL-CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
// CHECK-NEXT: ret <2 x i32> [[TMP1]]
More information about the cfe-commits
mailing list