[llvm-branch-commits] [clang] [HLSL][Matrix] EmitFromMemory when emitting load of vector and matrix element LValues (PR #178315)

Tue Feb 17 11:37:33 PST 2026

https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/178315

>From 3c3844dbe7d6b79de4f7a86eca7d1ad9480ca21c Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 27 Jan 2026 14:51:39 -0800
Subject: [PATCH 1/2] EmitFromMemory when emitting load vector and matrix
 element LValue

---
 clang/lib/CodeGen/CGExpr.cpp                  | 19 ++++---
 .../BasicFeatures/VectorElementwiseCast.hlsl  | 53 +++++++++++++++++++
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 490377c04b034..7f817000acb68 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2445,8 +2445,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
   if (LV.isVectorElt()) {
     llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
                                               LV.isVolatileQualified());
-    return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
-                                                    "vecext"));
+    llvm::Value *Elt =
+        Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext");
+    return RValue::get(EmitFromMemory(Elt, LV.getType()));
   }
 
   // If this is a reference to a subset of the elements of a vector, either
@@ -2461,14 +2462,18 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
 
   if (LV.isMatrixElt()) {
     llvm::Value *Idx = LV.getMatrixIdx();
-    if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
-      const auto *const MatTy = LV.getType()->castAs<ConstantMatrixType>();
-      llvm::MatrixBuilder MB(Builder);
-      MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+    QualType EltTy = LV.getType();
+    if (const auto *MatTy = EltTy->getAs<ConstantMatrixType>()) {
+      EltTy = MatTy->getElementType();
+      if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+        llvm::MatrixBuilder MB(Builder);
+        MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+      }
     }
     llvm::LoadInst *Load =
         Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
-    return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
+    llvm::Value *Elt = Builder.CreateExtractElement(Load, Idx, "matrixext");
+    return RValue::get(EmitFromMemory(Elt, EltTy));
   }
   if (LV.isMatrixRow()) {
     QualType MatTy = LV.getType();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index c11c8498ada45..881e6b5dd525a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -180,3 +180,56 @@ export void call8(int3x1 M) {
     int3 V = (int3)M;
 }
 
+// vector flat cast from matrix of same size (bool)
+// CHECK-LABEL: call9
+// CHECK:    [[M_ADDR:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store <2 x i32> [[TMP1]], ptr [[HLSL_EWCAST_SRC]], align 4
+// CHECK-NEXT:    [[MATRIX_GEP:%.*]] = getelementptr inbounds <2 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT:    [[MATRIXEXT:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[MATRIXEXT]] to i1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 [[LOADEDV]], i64 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4
+// CHECK-NEXT:    [[MATRIXEXT1:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i32 [[MATRIXEXT1]] to i1
+// CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i1> [[TMP4]], i1 [[LOADEDV2]], i64 1
+// CHECK-NEXT:    [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP7]], ptr [[V]], align 8
+// CHECK-NEXT:    ret void
+export void call9(bool1x2 M) {
+    bool2 V = (bool2)M;
+}
+
+struct BoolVecStruct {
+    bool2 V;
+};
+
+// vector flat cast from struct containing bool vector
+// CHECK-LABEL: call10
+// CHECK:    [[V:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 %s, i32 8, i1 false)
+// CHECK-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr inbounds %struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[VECEXT]] to i1
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i1> [[TMP0]], i1 [[LOADEDV]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8
+// CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i32 [[VECEXT1]] to i1
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 [[LOADEDV2]], i64 1
+// CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+// CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[V]], align 8
+// CHECK-NEXT:    ret void
+export void call10(BoolVecStruct s) {
+    bool2 V = (bool2)s;
+}

>From 341252791338a90604c41543834add90cd9d680a Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Tue, 17 Feb 2026 11:37:14 -0800
Subject: [PATCH 2/2] Update matrix allocas in test for array of vectors
 representation

---
 .../CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 83776b816211d..e232223b185c2 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -184,9 +184,11 @@ export void call8(int3x1 M) {
 
 // vector flat cast from matrix of same size (bool)
 // CHECK-LABEL: call9
-// CHECK:    [[M_ADDR:%.*]] = alloca [2 x i32], align 4
+// COL-CHECK:    [[M_ADDR:%.*]] = alloca [2 x <1 x i32>], align 4
+// ROW-CHECK:    [[M_ADDR:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    [[V:%.*]] = alloca <2 x i32>, align 8
-// CHECK-NEXT:    [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4
+// COL-CHECK-NEXT:    [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x <1 x i32>], align 4
+// ROW-CHECK-NEXT:    [[HLSL_EWCAST_SRC:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32>
 // CHECK-NEXT:    store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4