[llvm-branch-commits] [clang] [HLSL][Matrix] EmitFromMemory when emitting load of vector and matrix element LValues (PR #178315)
Deric C. via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Feb 17 11:37:33 PST 2026
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/178315
>From 3c3844dbe7d6b79de4f7a86eca7d1ad9480ca21c Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 27 Jan 2026 14:51:39 -0800
Subject: [PATCH 1/2] EmitFromMemory when emitting load vector and matrix
element LValue
---
clang/lib/CodeGen/CGExpr.cpp | 19 ++++---
.../BasicFeatures/VectorElementwiseCast.hlsl | 53 +++++++++++++++++++
2 files changed, 65 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 490377c04b034..7f817000acb68 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2445,8 +2445,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
if (LV.isVectorElt()) {
llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
LV.isVolatileQualified());
- return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
- "vecext"));
+ llvm::Value *Elt =
+ Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext");
+ return RValue::get(EmitFromMemory(Elt, LV.getType()));
}
// If this is a reference to a subset of the elements of a vector, either
@@ -2461,14 +2462,18 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
if (LV.isMatrixElt()) {
llvm::Value *Idx = LV.getMatrixIdx();
- if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
- const auto *const MatTy = LV.getType()->castAs<ConstantMatrixType>();
- llvm::MatrixBuilder MB(Builder);
- MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+ QualType EltTy = LV.getType();
+ if (const auto *MatTy = EltTy->getAs<ConstantMatrixType>()) {
+ EltTy = MatTy->getElementType();
+ if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+ llvm::MatrixBuilder MB(Builder);
+ MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+ }
}
llvm::LoadInst *Load =
Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
- return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
+ llvm::Value *Elt = Builder.CreateExtractElement(Load, Idx, "matrixext");
+ return RValue::get(EmitFromMemory(Elt, EltTy));
}
if (LV.isMatrixRow()) {
QualType MatTy = LV.getType();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index c11c8498ada45..881e6b5dd525a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -180,3 +180,56 @@ export void call8(int3x1 M) {
int3 V = (int3)M;
}
+// vector flat cast from matrix of same size (bool)
+// CHECK-LABEL: call9
+// CHECK: [[M_ADDR:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT: [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT: [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32>
+// CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <2 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[MATRIXEXT]] to i1
+// CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 [[LOADEDV]], i64 0
+// CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
+// CHECK-NEXT: [[LOADEDV2:%.*]] = trunc i32 [[MATRIXEXT1]] to i1
+// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i1> [[TMP4]], i1 [[LOADEDV2]], i64 1
+// CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32>
+// CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[V]], align 8
+// CHECK-NEXT: ret void
+export void call9(bool1x2 M) {
+ bool2 V = (bool2)M;
+}
+
+struct BoolVecStruct {
+ bool2 V;
+};
+
+// vector flat cast from struct containing bool vector
+// CHECK-LABEL: call10
+// CHECK: [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 %s, i32 8, i1 false)
+// CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr inbounds %struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[VECEXT]] to i1
+// CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> [[TMP0]], i1 [[LOADEDV]], i64 0
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+// CHECK-NEXT: [[LOADEDV2:%.*]] = trunc i32 [[VECEXT1]] to i1
+// CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 [[LOADEDV2]], i64 1
+// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+// CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[V]], align 8
+// CHECK-NEXT: ret void
+export void call10(BoolVecStruct s) {
+ bool2 V = (bool2)s;
+}
>From 341252791338a90604c41543834add90cd9d680a Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 17 Feb 2026 11:37:14 -0800
Subject: [PATCH 2/2] Update matrix allocas in test for array of vectors
representation
---
.../CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 83776b816211d..e232223b185c2 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -184,9 +184,11 @@ export void call8(int3x1 M) {
// vector flat cast from matrix of same size (bool)
// CHECK-LABEL: call9
-// CHECK: [[M_ADDR:%.*]] = alloca [2 x i32], align 4
+// COL-CHECK: [[M_ADDR:%.*]] = alloca [2 x <1 x i32>], align 4
+// ROW-CHECK: [[M_ADDR:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: [[V:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4
+// COL-CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x <1 x i32>], align 4
+// ROW-CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [1 x <2 x i32>], align 4
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
// CHECK-NEXT: [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32>
// CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
More information about the llvm-branch-commits
mailing list