[clang] [Matrix] Fix cbuffers support for matrix element expr (PR #185471)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 9 10:39:40 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Farzon Lotfi (farzonl)
<details>
<summary>Changes</summary>
fixes #<!-- -->184877
This change was threefold.
1. copy the padded cbuffer from memory to a local alloca
2. switch to using the new `getFlattenedIndex` helpers for index generation
3. convert row major to column major indicies in codegen depending on LangOptions
---
Patch is 72.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/185471.diff
12 Files Affected:
- (modified) clang/lib/AST/Expr.cpp (+12-12)
- (modified) clang/lib/CodeGen/CGExpr.cpp (+28-3)
- (added) clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl (+37)
- (modified) clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl (+12-12)
- (modified) clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl (+12-12)
- (modified) clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-load.hlsl (+8-8)
- (modified) clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-store.hlsl (+24-24)
- (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl (+12-12)
- (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl (+12-12)
- (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-load.hlsl (+8-8)
- (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-store.hlsl (+24-24)
- (added) clang/test/CodeGenHLSL/resources/MatrixElement_cbuffer.hlsl (+96)
``````````diff
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 9632d88fae4e4..185e887fb05c3 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4478,8 +4478,8 @@ static MatrixAccessorFormat GetHLSLMatrixAccessorFormat(StringRef Comp) {
}
template <typename Fn>
-static bool ForEachMatrixAccessorIndex(StringRef Comp, unsigned Rows,
- unsigned Cols, Fn &&F) {
+static bool ForEachMatrixAccessorIndex(StringRef Comp,
+ const ConstantMatrixType *MT, Fn &&F) {
auto Format = GetHLSLMatrixAccessorFormat(Comp);
for (unsigned I = 0, E = Comp.size(); I < E; I += Format.ChunkLen) {
@@ -4491,8 +4491,13 @@ static bool ForEachMatrixAccessorIndex(StringRef Comp, unsigned Rows,
Col = static_cast<unsigned>(Comp[I + ZeroIndexOffset + 2] - '0') -
OneIndexOffset;
- assert(Row < Rows && Col < Cols && "matrix swizzle index out of bounds");
- const unsigned Index = Row * Cols + Col;
+ assert(Row < MT->getNumRows() && Col < MT->getNumColumns() &&
+ "matrix swizzle index out of bounds");
+ // NOTE: AST layer has no access to LangOptions so we will default to row
+ // major b\c all other AST matrix representations are row major.
+ // However in codegen we need to convert to column major if the flag
+ // requires it.
+ const unsigned Index = MT->getFlattenedIndex(Row, Col, /*IsRowMajor*/ true);
// Callback returns true to continue, false to stop early.
if (!F(Index))
return false;
@@ -4507,13 +4512,10 @@ static bool ForEachMatrixAccessorIndex(StringRef Comp, unsigned Rows,
bool MatrixElementExpr::containsDuplicateElements() const {
StringRef Comp = Accessor->getName();
const auto *MT = getBase()->getType()->castAs<ConstantMatrixType>();
- const unsigned Rows = MT->getNumRows();
- const unsigned Cols = MT->getNumColumns();
- const unsigned Max = Rows * Cols;
- llvm::BitVector Seen(Max, /*t=*/false);
+ llvm::BitVector Seen(MT->getNumElementsFlattened(), /*t=*/false);
bool HasDup = false;
- ForEachMatrixAccessorIndex(Comp, Rows, Cols, [&](unsigned Index) -> bool {
+ ForEachMatrixAccessorIndex(Comp, MT, [&](unsigned Index) -> bool {
if (Seen[Index]) {
HasDup = true;
return false; // exit early
@@ -4562,9 +4564,7 @@ void MatrixElementExpr::getEncodedElementAccess(
SmallVectorImpl<uint32_t> &Elts) const {
StringRef Comp = Accessor->getName();
const auto *MT = getBase()->getType()->castAs<ConstantMatrixType>();
- const unsigned Rows = MT->getNumRows();
- const unsigned Cols = MT->getNumColumns();
- ForEachMatrixAccessorIndex(Comp, Rows, Cols, [&](unsigned Index) -> bool {
+ ForEachMatrixAccessorIndex(Comp, MT, [&](unsigned Index) -> bool {
Elts.push_back(Index);
return true;
});
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index eebb36276e0eb..fe5de9f8df09d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2329,15 +2329,39 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
E->getType().withCVRQualifiers(Base.getQuals().getCVRQualifiers());
// Encode the element access list into a vector of unsigned indices.
+ // getEncodedElementAccess returns row-major linearized indices.
SmallVector<uint32_t, 4> Indices;
E->getEncodedElementAccess(Indices);
+ // getEncodedElementAccess returns row-major linearized indices
+ // If the matrix memory layout is column-major, convert indices
+ // to column-major indices.
+ bool IsColMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
+ LangOptions::MatrixMemoryLayout::MatrixColMajor;
+ if (IsColMajor) {
+ const auto *MT = E->getBase()->getType()->castAs<ConstantMatrixType>();
+ unsigned NumCols = MT->getNumColumns();
+ for (uint32_t &Idx : Indices) {
+ // Decompose row-major index: Row = Idx / NumCols, Col = Idx % NumCols
+ unsigned Row = Idx / NumCols;
+ unsigned Col = Idx % NumCols;
+ // Re-linearize as column-major
+ Idx = MT->getColumnMajorFlattenedIndex(Row, Col);
+ }
+ }
+
if (Base.isSimple()) {
+ RawAddress MatAddr = Base.getAddress();
+ if (getLangOpts().HLSL &&
+ E->getBase()->getType().getAddressSpace() == LangAS::hlsl_constant)
+ MatAddr = CGM.getHLSLRuntime().createBufferMatrixTempAddress(
+ Base, E->getExprLoc(), *this);
+
llvm::Constant *CV =
llvm::ConstantDataVector::get(getLLVMContext(), Indices);
- return LValue::MakeExtVectorElt(
- MaybeConvertMatrixAddress(Base.getAddress(), *this), CV, ResultType,
- Base.getBaseInfo(), TBAAAccessInfo());
+ return LValue::MakeExtVectorElt(MaybeConvertMatrixAddress(MatAddr, *this),
+ CV, ResultType, Base.getBaseInfo(),
+ TBAAAccessInfo());
}
assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
@@ -2347,6 +2371,7 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) {
for (unsigned Index : Indices)
CElts.push_back(BaseElts->getAggregateElement(Index));
llvm::Constant *CV = llvm::ConstantVector::get(CElts);
+
return LValue::MakeExtVectorElt(
MaybeConvertMatrixAddress(Base.getExtVectorAddress(), *this), CV,
ResultType, Base.getBaseInfo(), TBAAAccessInfo());
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl
new file mode 100644
index 0000000000000..fe9e69c6a6898
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes \
+// RUN: -emit-llvm -finclude-default-header -fmatrix-memory-layout=column-major \
+// RUN: -o - %s | FileCheck %s --check-prefixes=CHECK,COL
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes \
+// RUN: -emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major \
+// RUN: -o - %s | FileCheck %s --check-prefixes=CHECK,ROW
+
+// For a float3x2 matrix (3 rows, 2 columns):
+// Column-major flat vector: [_11, _21, _31, _12, _22, _32]
+// idx: 0 1 2 3 4 5
+// Row-major flat vector: [_11, _12, _21, _22, _31, _32]
+// idx: 0 1 2 3 4 5
+
+
+// CHECK-LABEL: define {{.*}} @_Z16getScalarElementu11matrix_typeILm3ELm2EfE
+// CHECK: load <6 x float>, ptr
+// COL-NEXT: extractelement <6 x float> {{.*}}, i32 4
+// ROW-NEXT: extractelement <6 x float> {{.*}}, i32 3
+export float getScalarElement(float3x2 M) {
+ return M._22;
+}
+
+// CHECK-LABEL: define {{.*}} @_Z18getSwizzleElementsu11matrix_typeILm3ELm2EfE
+// CHECK: load <6 x float>, ptr
+// COL-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4>
+// ROW-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+export float4 getSwizzleElements(float3x2 M) {
+ return M._11_12_21_22;
+}
+
+// CHECK-LABEL: define {{.*}} @_Z22getZeroBasedSwizzleEltu11matrix_typeILm3ELm2EfE
+// CHECK: load <6 x float>, ptr
+// COL-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <2 x i32> <i32 1, i32 3>
+// ROW-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <2 x i32> <i32 2, i32 1>
+export float2 getZeroBasedSwizzleElt(float3x2 M) {
+ return M._m10_m01;
+}
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
index def8aa5440568..6d8a3ce6ecbb6 100644
--- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl
@@ -23,7 +23,7 @@ int Return11(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 1
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 4
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return12(int4x4 A) {
@@ -36,7 +36,7 @@ int Return12(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 2
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 8
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return13(int4x4 A) {
@@ -49,7 +49,7 @@ int Return13(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 3
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 12
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return14(int4x4 A) {
@@ -62,7 +62,7 @@ int Return14(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 4
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 1
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return21(int4x4 A) {
@@ -88,7 +88,7 @@ int Return22(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 6
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 9
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return23(int4x4 A) {
@@ -101,7 +101,7 @@ int Return23(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 7
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 13
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return24(int4x4 A) {
@@ -114,7 +114,7 @@ int Return24(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 8
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 2
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return31(int4x4 A) {
@@ -127,7 +127,7 @@ int Return31(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 9
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 6
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return32(int4x4 A) {
@@ -153,7 +153,7 @@ int Return33(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 11
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 14
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return34(int4x4 A) {
@@ -166,7 +166,7 @@ int Return34(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 12
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 3
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return41(int4x4 A) {
@@ -179,7 +179,7 @@ int Return41(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 13
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 7
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return42(int4x4 A) {
@@ -192,7 +192,7 @@ int Return42(int4x4 A) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
// CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 14
+// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 11
// CHECK-NEXT: ret i32 [[TMP1]]
//
int Return43(int4x4 A) {
diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
index fb3a46170ebe0..9ea292ecea007 100644
--- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl
@@ -29,7 +29,7 @@ void StoreScalarAtMat11(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -46,7 +46,7 @@ void StoreScalarAtMat12(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -63,7 +63,7 @@ void StoreScalarAtMat13(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 3
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -80,7 +80,7 @@ void StoreScalarAtMat14(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -114,7 +114,7 @@ void StoreScalarAtMat22(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -131,7 +131,7 @@ void StoreScalarAtMat23(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 7
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -148,7 +148,7 @@ void StoreScalarAtMat24(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -165,7 +165,7 @@ void StoreScalarAtMat31(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -199,7 +199,7 @@ void StoreScalarAtMat33(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 11
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -216,7 +216,7 @@ void StoreScalarAtMat34(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 3
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -233,7 +233,7 @@ void StoreScalarAtMat41(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]]
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 7
// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4
// CHECK-NEXT: ret void
//
@@ -250,7 +250,7 @@ void StoreScalarAtMat42(out int4x4 A, int I) {
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/185471
More information about the cfe-commits
mailing list