[clang] [HLSL][Matrix] Add support for Matrix element and trunc Casts (PR #168915)
Farzon Lotfi via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 2 10:13:39 PST 2025
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/168915
>From 49bc3915548e9755c5448d9ecff9b3128ae1c3ba Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 20 Nov 2025 12:16:26 -0500
Subject: [PATCH 01/12] [HLSL][Matrix] Add support for Matrix element and trunc
Casts
fixes #168737
fixes #168755
This change fixes adds support for Matrix truncations
via the ICK_HLSL_Matrix_Truncation enum. That ends up being
most of the files changed.
It also allows Matrix as an HLSL Elementwise cast as long as the
cast does not perform a shape transformation ie 3x2 to 2x3.
Tests for the new elementwise and truncation behavior were added.
As well as sema tests to make sure we error n the shape transformation
cast.
I am punting right now on the ConstExpr Matrix support.
That will need to be addressed later. Will file a seperate issue for
that if reviewers agree it can wait.
---
clang/include/clang/AST/OperationKinds.def | 3 +
clang/include/clang/Sema/Overload.h | 3 +
clang/lib/AST/Expr.cpp | 1 +
clang/lib/AST/ExprConstant.cpp | 13 ++
clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 2 +
clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp | 1 +
clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 1 +
clang/lib/CodeGen/CGExpr.cpp | 1 +
clang/lib/CodeGen/CGExprAgg.cpp | 3 +-
clang/lib/CodeGen/CGExprComplex.cpp | 1 +
clang/lib/CodeGen/CGExprConstant.cpp | 1 +
clang/lib/CodeGen/CGExprScalar.cpp | 35 +++-
clang/lib/Sema/SemaExprCXX.cpp | 22 ++-
clang/lib/Sema/SemaHLSL.cpp | 5 +-
clang/lib/Sema/SemaOverload.cpp | 75 ++++++-
clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 1 +
.../BasicFeatures/MatrixElementTypeCast.hlsl | 186 ++++++++++++++++++
.../BasicFeatures/MatrixTruncation.hlsl | 156 +++++++++++++++
.../Types/BuiltinMatrix/MatrixCastErrors.hlsl | 21 ++
19 files changed, 516 insertions(+), 15 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
create mode 100644 clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def
index c2dca895e8411..8a13ad988403b 100644
--- a/clang/include/clang/AST/OperationKinds.def
+++ b/clang/include/clang/AST/OperationKinds.def
@@ -364,6 +364,9 @@ CAST_OPERATION(IntToOCLSampler)
// Truncate a vector type by dropping elements from the end (HLSL only).
CAST_OPERATION(HLSLVectorTruncation)
+// Truncate a matrix type by dropping elements from the end (HLSL only).
+CAST_OPERATION(HLSLMatrixTruncation)
+
// Non-decaying array RValue cast (HLSL only).
CAST_OPERATION(HLSLArrayRValue)
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 59bbd0fbd9e95..1ad52cb9da517 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -198,6 +198,9 @@ class Sema;
/// HLSL vector truncation.
ICK_HLSL_Vector_Truncation,
+ /// HLSL Matrid truncation.
+ ICK_HLSL_Matrix_Truncation,
+
/// HLSL non-decaying array rvalue cast.
ICK_HLSL_Array_RValue,
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 1f405920ce6b5..ca7f3e16a9276 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1934,6 +1934,7 @@ bool CastExpr::CastConsistency() const {
case CK_FixedPointToBoolean:
case CK_HLSLArrayRValue:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
CheckNoBasePath:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e5af4cb049ba9..baadaab0973a2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11770,6 +11770,10 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
Elements.push_back(Val.getVectorElt(I));
return Success(Elements, E);
}
+ case CK_HLSLMatrixTruncation: {
+ // TODO: support Expr Constant for Matrix Truncation
+ return Error(E);
+ }
case CK_HLSLAggregateSplatCast: {
APValue Val;
QualType ValTy;
@@ -18430,6 +18434,10 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
return Error(E);
return Success(Val.getVectorElt(0), E);
}
+ case CK_HLSLMatrixTruncation: {
+ // TODO: support Expr Constant for Matrix Truncation
+ return Error(E);
+ }
case CK_HLSLElementwiseCast: {
SmallVector<APValue> SrcVals;
SmallVector<QualType> SrcTypes;
@@ -19023,6 +19031,10 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) {
return Error(E);
return Success(Val.getVectorElt(0), E);
}
+ case CK_HLSLMatrixTruncation: {
+ // TODO: support Expr Constant for Matrix Truncation
+ return Error(E);
+ }
case CK_HLSLElementwiseCast: {
SmallVector<APValue> SrcVals;
SmallVector<QualType> SrcTypes;
@@ -19180,6 +19192,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) {
case CK_IntegralToFixedPoint:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
llvm_unreachable("invalid cast kind for complex value");
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 4065124f8f568..d48fb45aed78c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -188,6 +188,7 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr,
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_IntToOCLSampler:
case CK_IntegralCast:
case CK_IntegralComplexCast:
@@ -1323,6 +1324,7 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) {
case CK_IntegralToFixedPoint:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index 9ed920085c8c6..fe06f8cc2c430 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -534,6 +534,7 @@ mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr *op,
case CK_IntegralToFixedPoint:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 66f8ef9b05913..329fd08bc8914 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1012,6 +1012,7 @@ class ConstExprEmitter
case CK_MatrixCast:
case CK_HLSLArrayRValue:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
return {};
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c8f669b69d991..a54d400c66968 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5772,6 +5772,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
case CK_IntegralToFixedPoint:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 67b5f919d1b2a..7cc4d6c8f06f6 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -1036,7 +1036,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
case CK_ZeroToOCLOpaqueType:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
-
+ case CK_HLSLMatrixTruncation:
case CK_IntToOCLSampler:
case CK_FloatingToFixedPoint:
case CK_FixedPointToFloating:
@@ -1550,6 +1550,7 @@ static bool castPreservesZero(const CastExpr *CE) {
case CK_NonAtomicToAtomic:
case CK_AtomicToNonAtomic:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
return true;
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index bca7c30557f03..e5815ef1130dc 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -621,6 +621,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
case CK_IntegralToFixedPoint:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 6407afc3d9447..0eec4dba4824a 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1333,6 +1333,7 @@ class ConstExprEmitter
case CK_ZeroToOCLOpaqueType:
case CK_MatrixCast:
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 714192db1b15c..a9e2ebdffa59a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2422,9 +2422,27 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
}
return V;
}
+ if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+ assert(LoadList.size() >= MatTy->getNumElementsFlattened() &&
+ "Flattened type on RHS must have the same number or more elements "
+ "than vector on LHS.");
+ llvm::Value *V =
+ CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+ // write to V.
+ for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+ assert(RVal.isScalar() &&
+ "All flattened source values should be scalars.");
+ llvm::Value *Cast =
+ CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+ MatTy->getElementType(), Loc);
+ V = CGF.Builder.CreateInsertElement(V, Cast, I);
+ }
+ return V;
+ }
// if its a builtin just do an extract element or load.
assert(DestTy->isBuiltinType() &&
- "Destination type must be a vector or builtin type.");
+ "Destination type must be a vector, matrix, or builtin type.");
RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc);
assert(RVal.isScalar() && "All flattened source values should be scalars.");
return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(),
@@ -2954,6 +2972,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc");
}
+ case CK_HLSLMatrixTruncation: {
+ assert((DestTy->isMatrixType() || DestTy->isBuiltinType()) &&
+ "Destination type must be a matrix or builtin type.");
+ Value *Mat = Visit(E);
+ if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+ SmallVector<int> Mask;
+ unsigned NumElts = MatTy->getNumElementsFlattened();
+ for (unsigned I = 0; I != NumElts; ++I)
+ Mask.push_back(I);
+
+ return Builder.CreateShuffleVector(Mat, Mask, "trunc");
+ }
+ llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
+ return Builder.CreateExtractElement(Mat, Zero, "cast.mtrunc");
+ }
case CK_HLSLElementwiseCast: {
RValue RV = CGF.EmitAnyExpr(E);
SourceLocation Loc = CE->getExprLoc();
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index d6f70e728be29..69719ebd1fc8c 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -5196,6 +5196,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
case ICK_Incompatible_Pointer_Conversion:
case ICK_HLSL_Array_RValue:
case ICK_HLSL_Vector_Truncation:
+ case ICK_HLSL_Matrix_Truncation:
case ICK_HLSL_Vector_Splat:
llvm_unreachable("Improper second standard conversion");
}
@@ -5203,12 +5204,10 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
if (SCS.Dimension != ICK_Identity) {
// If SCS.Element is not ICK_Identity the To and From types must be HLSL
// vectors or matrices.
-
- // TODO: Support HLSL matrices.
- assert((!From->getType()->isMatrixType() && !ToType->isMatrixType()) &&
- "Dimension conversion for matrix types is not implemented yet.");
- assert((ToType->isVectorType() || ToType->isBuiltinType()) &&
- "Dimension conversion output must be vector or scalar type.");
+ assert(
+ (ToType->isVectorType() || ToType->isConstantMatrixType() ||
+ ToType->isBuiltinType()) &&
+ "Dimension conversion output must be vector, matrix, or scalar type.");
switch (SCS.Dimension) {
case ICK_HLSL_Vector_Splat: {
// Vector splat from any arithmetic type to a vector.
@@ -5234,6 +5233,17 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
break;
}
+ case ICK_HLSL_Matrix_Truncation: {
+ auto *FromMat = From->getType()->castAs<ConstantMatrixType>();
+ QualType TruncTy = FromMat->getElementType();
+ if (auto *ToMat = ToType->getAs<ConstantMatrixType>())
+ TruncTy = Context.getConstantMatrixType(TruncTy, ToMat->getNumRows(),
+ ToMat->getNumColumns());
+ From = ImpCastExprToType(From, TruncTy, CK_HLSLMatrixTruncation,
+ From->getValueKind())
+ .get();
+ break;
+ }
case ICK_Identity:
default:
llvm_unreachable("Improper element standard conversion");
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index ecab3946b58c7..921fed93c4268 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3721,7 +3721,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) {
}
// Can we perform an HLSL Elementwise cast?
-// TODO: update this code when matrices are added; see issue #88060
bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
// Don't handle casts where LHS and RHS are any combination of scalar/vector
@@ -3734,6 +3733,10 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
(DestTy->isScalarType() || DestTy->isVectorType()))
return false;
+ if (SrcTy->isConstantMatrixType() &&
+ (DestTy->isScalarType() || DestTy->isConstantMatrixType()))
+ return false;
+
llvm::SmallVector<QualType> DestTypes;
BuildFlattenedTypeList(DestTy, DestTypes);
llvm::SmallVector<QualType> SrcTypes;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index c12f92dfdab66..b5f92eab9b2f1 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -162,6 +162,7 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) {
ICR_C_Conversion_Extension,
ICR_Conversion,
ICR_HLSL_Dimension_Reduction,
+ ICR_HLSL_Dimension_Reduction,
ICR_Conversion,
ICR_HLSL_Scalar_Widening,
};
@@ -224,6 +225,7 @@ static const char *GetImplicitConversionName(ImplicitConversionKind Kind) {
"Incompatible pointer conversion",
"Fixed point conversion",
"HLSL vector truncation",
+ "HLSL matrix truncation",
"Non-decaying array conversion",
"HLSL vector splat",
};
@@ -2060,9 +2062,10 @@ static bool IsFloatingPointConversion(Sema &S, QualType FromType,
return true;
}
-static bool IsVectorElementConversion(Sema &S, QualType FromType,
- QualType ToType,
- ImplicitConversionKind &ICK, Expr *From) {
+static bool IsVectorOrMatrixElementConversion(Sema &S, QualType FromType,
+ QualType ToType,
+ ImplicitConversionKind &ICK,
+ Expr *From) {
if (S.Context.hasSameUnqualifiedType(FromType, ToType))
return true;
@@ -2102,6 +2105,59 @@ static bool IsVectorElementConversion(Sema &S, QualType FromType,
return false;
}
+/// Determine whether the conversion from FromType to ToType is a valid
+/// matrix conversion.
+///
+/// \param ICK Will be set to the matrix conversion kind, if this is a matrix
+/// conversion.
+static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
+ ImplicitConversionKind &ICK,
+ ImplicitConversionKind &ElConv, Expr *From,
+ bool InOverloadResolution, bool CStyle) {
+ // The non HLSL Matrix conversion rules are not clear.
+ if (!S.getLangOpts().HLSL)
+ return false;
+
+ auto *ToMatrixType = ToType->getAs<ConstantMatrixType>();
+ auto *FromMatrixType = FromType->getAs<ConstantMatrixType>();
+
+ // If both arguments are vectors, handle possible vector truncation and
+ // element conversion.
+ if (ToMatrixType && FromMatrixType) {
+ unsigned FromCols = FromMatrixType->getNumColumns();
+ unsigned ToCols = ToMatrixType->getNumColumns();
+ if (FromCols < ToCols)
+ return false;
+
+ unsigned FromRows = FromMatrixType->getNumRows();
+ unsigned ToRows = ToMatrixType->getNumRows();
+ if (FromRows < ToRows)
+ return false;
+
+ unsigned FromElts = FromMatrixType->getNumElementsFlattened();
+ unsigned ToElts = ToMatrixType->getNumElementsFlattened();
+ if (FromElts == ToElts)
+ ElConv = ICK_Identity;
+ else
+ ElConv = ICK_HLSL_Matrix_Truncation;
+
+ QualType FromElTy = FromMatrixType->getElementType();
+ QualType ToElTy = ToMatrixType->getElementType();
+ if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
+ return true;
+ return IsVectorOrMatrixElementConversion(S, FromElTy, ToElTy, ICK, From);
+ }
+ if (FromMatrixType && !ToMatrixType) {
+ ElConv = ICK_HLSL_Matrix_Truncation;
+ QualType FromElTy = FromMatrixType->getElementType();
+ if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
+ return true;
+ return IsVectorOrMatrixElementConversion(S, FromElTy, ToType, ICK, From);
+ }
+
+ return false;
+}
+
/// Determine whether the conversion from FromType to ToType is a valid
/// vector conversion.
///
@@ -2141,14 +2197,14 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType,
QualType ToElTy = ToExtType->getElementType();
if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
return true;
- return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From);
+ return IsVectorOrMatrixElementConversion(S, FromElTy, ToElTy, ICK, From);
}
if (FromExtType && !ToExtType) {
ElConv = ICK_HLSL_Vector_Truncation;
QualType FromElTy = FromExtType->getElementType();
if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
return true;
- return IsVectorElementConversion(S, FromElTy, ToType, ICK, From);
+ return IsVectorOrMatrixElementConversion(S, FromElTy, ToType, ICK, From);
}
// Fallthrough for the case where ToType is a vector and FromType is not.
}
@@ -2175,7 +2231,8 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType,
if (S.getLangOpts().HLSL) {
ElConv = ICK_HLSL_Vector_Splat;
QualType ToElTy = ToExtType->getElementType();
- return IsVectorElementConversion(S, FromType, ToElTy, ICK, From);
+ return IsVectorOrMatrixElementConversion(S, FromType, ToElTy, ICK,
+ From);
}
ICK = ICK_Vector_Splat;
return true;
@@ -2474,6 +2531,11 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
SCS.Second = SecondICK;
SCS.Dimension = DimensionICK;
FromType = ToType.getUnqualifiedType();
+ } else if (IsMatrixConversion(S, FromType, ToType, SecondICK, DimensionICK,
+ From, InOverloadResolution, CStyle)) {
+ SCS.Second = SecondICK;
+ SCS.Dimension = DimensionICK;
+ FromType = ToType.getUnqualifiedType();
} else if (!S.getLangOpts().CPlusPlus &&
S.Context.typesAreCompatible(ToType, FromType)) {
// Compatible conversions (Clang extension for C function overloading)
@@ -6251,6 +6313,7 @@ static bool CheckConvertedConstantConversions(Sema &S,
case ICK_Incompatible_Pointer_Conversion:
case ICK_Fixed_Point_Conversion:
case ICK_HLSL_Vector_Truncation:
+ case ICK_HLSL_Matrix_Truncation:
return false;
case ICK_Lvalue_To_Rvalue:
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 4ddf8fd5b4b0f..db27c06cd18a3 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -560,6 +560,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
case CK_VectorSplat:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLVectorTruncation: {
QualType resultType = CastE->getType();
if (CastE->isGLValue())
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
new file mode 100644
index 0000000000000..081b8013efcbc
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -0,0 +1,186 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fnative-half-type -fnative-int16-type -o - %s | FileCheck %s
+
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast0u11matrix_typeILm3ELm2EfE(
+// CHECK-SAME: <6 x float> noundef nofpclass(nan inf) [[F32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca [6 x float], align 4
+// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <6 x float> [[F32]], ptr [[F32_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <6 x float>, ptr [[F32_ADDR]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x float> [[TMP0]] to <6 x i32>
+// CHECK-NEXT: store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast0(float3x2 f32) {
+ int3x2 i32 = (int3x2)f32;
+ return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast1u11matrix_typeILm3ELm2EsE(
+// CHECK-SAME: <6 x i16> noundef [[I16_32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I16_32_ADDR:%.*]] = alloca [6 x i16], align 2
+// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <6 x i16> [[I16_32]], ptr [[I16_32_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load <6 x i16>, ptr [[I16_32_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = sext <6 x i16> [[TMP0]] to <6 x i32>
+// CHECK-NEXT: store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast1(int16_t3x2 i16_32) {
+ int3x2 i32 = (int3x2)i16_32;
+ return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast2u11matrix_typeILm3ELm2ElE(
+// CHECK-SAME: <6 x i64> noundef [[I64_32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I64_32_ADDR:%.*]] = alloca [6 x i64], align 8
+// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <6 x i64> [[I64_32]], ptr [[I64_32_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[I64_32_ADDR]], align 8
+// CHECK-NEXT: [[CONV:%.*]] = trunc <6 x i64> [[TMP0]] to <6 x i32>
+// CHECK-NEXT: store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast2(int64_t3x2 i64_32) {
+ int3x2 i32 = (int3x2)i64_32;
+ return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i16> @_Z22elementwise_type_cast3u11matrix_typeILm2ELm3EDhE(
+// CHECK-SAME: <6 x half> noundef nofpclass(nan inf) [[H23:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[H23_ADDR:%.*]] = alloca [6 x half], align 2
+// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i16], align 2
+// CHECK-NEXT: store <6 x half> [[H23]], ptr [[H23_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load <6 x half>, ptr [[H23_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x half> [[TMP0]] to <6 x i16>
+// CHECK-NEXT: store <6 x i16> [[CONV]], ptr [[I23]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i16>, ptr [[I23]], align 2
+// CHECK-NEXT: ret <6 x i16> [[TMP1]]
+//
+int16_t2x3 elementwise_type_cast3(half2x3 h23) {
+ int16_t2x3 i23 = (int16_t2x3)h23;
+ return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast4u11matrix_typeILm3ELm2EdE(
+// CHECK-SAME: <6 x double> noundef nofpclass(nan inf) [[D32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[D32_ADDR:%.*]] = alloca [6 x double], align 8
+// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <6 x double> [[D32]], ptr [[D32_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load <6 x double>, ptr [[D32_ADDR]], align 8
+// CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x double> [[TMP0]] to <6 x i32>
+// CHECK-NEXT: store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast4(double3x2 d32) {
+ int3x2 i32 = (int3x2)d32;
+ return i32;
+}
+
+// CHECK-LABEL: define hidden void @_Z5call2v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca [2 x [1 x i32]], align 4
+// CHECK-NEXT: [[B:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [2 x [1 x i32]], align 4
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @__const._Z5call2v.A, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP]], ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[AGG_TEMP]], i32 0, i32 1, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP3]], i64 1
+// CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[B]], align 4
+// CHECK-NEXT: ret void
+//
+void call2() {
+ int A[2][1] = {{1},{2}};
+ int2x1 B = (int2x1)A;
+}
+
+struct S {
+ int X;
+ float Y;
+};
+
+// CHECK-LABEL: define hidden void @_Z5call3v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z5call3v.s, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[S]], i32 8, i1 false)
+// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP1]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP3]] to i32
+// CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[CONV]], i64 1
+// CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
+void call3() {
+ S s = {1, 2.0};
+ int2x1 A = (int2x1)s;
+}
+
+struct BFields {
+ double D;
+ int E: 15;
+ int : 8;
+ float F;
+};
+
+struct Derived : BFields {
+ int G;
+};
+
+// CHECK-LABEL: define hidden void @_Z5call47Derived(
+// CHECK-SAME: ptr noundef byval([[STRUCT_DERIVED:%.*]]) align 1 [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[E:%.*]] = getelementptr inbounds nuw [[STRUCT_BFIELDS:%.*]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[GEP1]], align 8
+// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[TMP1]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[CONV]], i64 0
+// CHECK-NEXT: [[BF_LOAD:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT: [[BF_SHL:%.*]] = shl i24 [[BF_LOAD]], 9
+// CHECK-NEXT: [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
+// CHECK-NEXT: [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
+// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[BF_CAST]], i64 1
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
+// CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[CONV4]], i64 2
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP3]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 3
+// CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[A]], align 4
+// CHECK-NEXT: ret void
+//
+void call4(Derived D) {
+ int2x2 A = (int2x2)D;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
new file mode 100644
index 0000000000000..f16d01e1d12ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
@@ -0,0 +1,156 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z10trunc_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I34:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
+// CHECK-NEXT: ret <12 x i32> [[TMP1]]
+//
+ int3x4 trunc_cast(int4x4 i44) {
+ int3x4 i34 = (int3x4)i44;
+ return i34;
+}
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z11trunc_cast0u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I43:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
+// CHECK-NEXT: ret <12 x i32> [[TMP1]]
+//
+ int4x3 trunc_cast0(int4x4 i44) {
+ int4x3 i43 = (int4x3)i44;
+ return i43;
+}
+
+// CHECK-LABEL: define hidden noundef <9 x i32> @_Z11trunc_cast1u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I33:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+// CHECK-NEXT: store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
+// CHECK-NEXT: ret <9 x i32> [[TMP1]]
+//
+ int3x3 trunc_cast1(int4x4 i44) {
+ int3x3 i33 = (int3x3)i44;
+ return i33;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+ int3x2 trunc_cast2(int4x4 i44) {
+ int3x2 i32 = (int3x2)i44;
+ return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast3u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+ int2x3 trunc_cast3(int4x4 i44) {
+ int2x3 i23 = (int2x3)i44;
+ return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z11trunc_cast4u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I22:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
+ int2x2 trunc_cast4(int4x4 i44) {
+ int2x2 i22 = (int2x2)i44;
+ return i22;
+}
+
+// CHECK-LABEL: define hidden noundef <2 x i32> @_Z11trunc_cast5u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I21:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
+ int2x1 trunc_cast5(int4x4 i44) {
+ int2x1 i21 = (int2x1)i44;
+ return i21;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z11trunc_cast6u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[CAST_MTRUNC:%.*]] = extractelement <16 x i32> [[TMP0]], i32 0
+// CHECK-NEXT: store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
+ int trunc_cast6(int4x4 i44) {
+ int i1 = (int)i44;
+ return i1;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z16trunc_multi_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: [[CAST_MTRUNC:%.*]] = extractelement <12 x i32> [[TRUNC]], i32 0
+// CHECK-NEXT: store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
+ int trunc_multi_cast(int4x4 i44) {
+ int i1 = (int)(int3x4)i44;
+ return i1;
+}
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
new file mode 100644
index 0000000000000..59d432cd3eb00
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -std=hlsl202x -verify %s
+
+// Note column is too large
+export int3x2 shape_cast_error(float2x3 f23) {
+ int3x2 i32 = (int3x2)f23;
+ // expected-error at -1 {{conversion between matrix types 'int3x2' (aka 'matrix<int, 3, 2>') and 'matrix<float, 2, 3>' of different size is not allowed}}
+ return i32;
+}
+// Note row is too large
+export int2x3 shape_cast_error2(float3x2 f32) {
+ int2x3 i23 = (int2x3)f32;
+ // expected-error at -1 {{conversion between matrix types 'int2x3' (aka 'matrix<int, 2, 3>') and 'matrix<float, 3, 2>' of different size is not allowed}}
+ return i23;
+}
+
+// Note do the type change independent of the shape should still error
+export int2x3 shape_cast_error3(float3x2 f32) {
+ int2x3 i23 = (int3x2)f32;
+ // expected-error at -1 {{cannot initialize a variable of type 'matrix<[...], 2, 3>' with an rvalue of type 'matrix<[...], 3, 2>}}
+ return i23;
+}
>From ba2dab99a64b3cfbf00f9286e72a58b735c332a0 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 20 Nov 2025 13:05:55 -0500
Subject: [PATCH 02/12] fix ObjC warning
---
clang/lib/Edit/RewriteObjCFoundationAPI.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
index 40f8348241ecc..e8d4660fd36b2 100644
--- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
+++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
@@ -1085,6 +1085,7 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg,
llvm_unreachable("OpenCL-specific cast in Objective-C?");
case CK_HLSLVectorTruncation:
+ case CK_HLSLMatrixTruncation:
case CK_HLSLElementwiseCast:
case CK_HLSLAggregateSplatCast:
llvm_unreachable("HLSL-specific cast in Objective-C?");
>From 909ad8da1c24ae4ea792af0fc18c1b52e53f66eb Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 20 Nov 2025 18:21:13 -0500
Subject: [PATCH 03/12] address pr comments
---
clang/include/clang/Sema/Overload.h | 2 +-
clang/lib/AST/ExprConstant.cpp | 6 +-
clang/lib/Sema/SemaOverload.cpp | 4 +-
...ion.hlsl => MatrixExplicitTruncation.hlsl} | 0
.../MatrixImplicitTruncation.hlsl | 138 +++++++++
.../MatrixElementOverloadResolution.hlsl | 287 ++++++++++++++++++
6 files changed, 430 insertions(+), 7 deletions(-)
rename clang/test/CodeGenHLSL/BasicFeatures/{MatrixTruncation.hlsl => MatrixExplicitTruncation.hlsl} (100%)
create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
create mode 100644 clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 1ad52cb9da517..ab45328ee8ab7 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -198,7 +198,7 @@ class Sema;
/// HLSL vector truncation.
ICK_HLSL_Vector_Truncation,
- /// HLSL Matrid truncation.
+ /// HLSL Matrix truncation.
ICK_HLSL_Matrix_Truncation,
/// HLSL non-decaying array rvalue cast.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index baadaab0973a2..c6366a9df4dd8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11771,7 +11771,7 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
return Success(Elements, E);
}
case CK_HLSLMatrixTruncation: {
- // TODO: support Expr Constant for Matrix Truncation
+ // TODO: See #168935. Add matrix truncation support to expr constant.
return Error(E);
}
case CK_HLSLAggregateSplatCast: {
@@ -18435,7 +18435,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
return Success(Val.getVectorElt(0), E);
}
case CK_HLSLMatrixTruncation: {
- // TODO: support Expr Constant for Matrix Truncation
+ // TODO: See #168935. Add matrix truncation support to expr constant.
return Error(E);
}
case CK_HLSLElementwiseCast: {
@@ -19032,7 +19032,7 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) {
return Success(Val.getVectorElt(0), E);
}
case CK_HLSLMatrixTruncation: {
- // TODO: support Expr Constant for Matrix Truncation
+ // TODO: See #168935. Add matrix truncation support to expr constant.
return Error(E);
}
case CK_HLSLElementwiseCast: {
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index b5f92eab9b2f1..1acbda85601af 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -2134,9 +2134,7 @@ static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
if (FromRows < ToRows)
return false;
- unsigned FromElts = FromMatrixType->getNumElementsFlattened();
- unsigned ToElts = ToMatrixType->getNumElementsFlattened();
- if (FromElts == ToElts)
+ if (FromRows == ToRows && FromCols == ToCols)
ElConv = ICK_Identity;
else
ElConv = ICK_HLSL_Matrix_Truncation;
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
similarity index 100%
rename from clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
rename to clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
new file mode 100644
index 0000000000000..6a53d2e8ee96c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -0,0 +1,138 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z10trunc_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I34:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
+// CHECK-NEXT: ret <12 x i32> [[TMP1]]
+//
+ int3x4 trunc_cast(int4x4 i44) {
+ int3x4 i34 = i44;
+ return i34;
+}
+
+// CHECK-LABEL: define hidden noundef <12 x i32> @_Z11trunc_cast0u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I43:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
+// CHECK-NEXT: ret <12 x i32> [[TMP1]]
+//
+ int4x3 trunc_cast0(int4x4 i44) {
+ int4x3 i43 = i44;
+ return i43;
+}
+
+// CHECK-LABEL: define hidden noundef <9 x i32> @_Z11trunc_cast1u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I33:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+// CHECK-NEXT: store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
+// CHECK-NEXT: ret <9 x i32> [[TMP1]]
+//
+ int3x3 trunc_cast1(int4x4 i44) {
+ int3x3 i33 = i44;
+ return i33;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+ int3x2 trunc_cast2(int4x4 i44) {
+ int3x2 i32 = i44;
+ return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> @_Z11trunc_cast3u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
+// CHECK-NEXT: ret <6 x i32> [[TMP1]]
+//
+ int2x3 trunc_cast3(int4x4 i44) {
+ int2x3 i23 = i44;
+ return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> @_Z11trunc_cast4u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I22:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
+// CHECK-NEXT: ret <4 x i32> [[TMP1]]
+//
+ int2x2 trunc_cast4(int4x4 i44) {
+ int2x2 i22 = i44;
+ return i22;
+}
+
+// CHECK-LABEL: define hidden noundef <2 x i32> @_Z11trunc_cast5u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I21:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
+// CHECK-NEXT: ret <2 x i32> [[TMP1]]
+//
+ int2x1 trunc_cast5(int4x4 i44) {
+ int2x1 i21 = i44;
+ return i21;
+}
+
+// CHECK-LABEL: define hidden noundef i32 @_Z11trunc_cast6u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT: [[CAST_MTRUNC:%.*]] = extractelement <16 x i32> [[TMP0]], i32 0
+// CHECK-NEXT: store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
+ int trunc_cast6(int4x4 i44) {
+ int i1 = i44;
+ return i1;
+}
diff --git a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
new file mode 100644
index 0000000000000..ebe4db7a9e26e
--- /dev/null
+++ b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
@@ -0,0 +1,287 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -Wconversion -verify -o - %s -DERROR=1
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -Wno-conversion -ast-dump %s | FileCheck %s
+
+// This test verifies floating point type implicit conversion ranks for overload
+// resolution. In HLSL the built-in type ranks are half < float < double. This
+// applies to both scalar and matrix types.
+
+// HLSL allows implicit truncation fo types, so it differentiates between
+// promotions (converting to larger types) and conversions (converting to
+// smaller types). Promotions are preferred over conversions. Promotions prefer
+// promoting to the next lowest type in the ranking order. Conversions prefer
+// converting to the next highest type in the ranking order.
+
+void HalfFloatDouble(double2x2 D);
+void HalfFloatDouble(float2x2 F);
+void HalfFloatDouble(half2x2 H);
+
+// CHECK: FunctionDecl {{.*}} used HalfFloatDouble 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfFloatDouble 'void (float2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfFloatDouble 'void (half2x2)'
+
+void FloatDouble(double2x2 D); // expected-note {{candidate function}}
+void FloatDouble(float2x2 F); // expected-note {{candidate function}}
+
+// CHECK: FunctionDecl {{.*}} used FloatDouble 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used FloatDouble 'void (float2x2)'
+
+void HalfDouble(double2x2 D);
+void HalfDouble(half2x2 H);
+
+// CHECK: FunctionDecl {{.*}} used HalfDouble 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfDouble 'void (half2x2)'
+
+void HalfFloat(float2x2 F); // expected-note {{candidate function}}
+void HalfFloat(half2x2 H); // expected-note {{candidate function}}
+
+// CHECK: FunctionDecl {{.*}} used HalfFloat 'void (float2x2)'
+// CHECK: FunctionDecl {{.*}} used HalfFloat 'void (half2x2)'
+
+void Double(double2x2 D);
+void Float(float2x2 F);
+void Half(half2x2 H);
+
+// CHECK: FunctionDecl {{.*}} used Double 'void (double2x2)'
+// CHECK: FunctionDecl {{.*}} used Float 'void (float2x2)'
+// CHECK: FunctionDecl {{.*}} used Half 'void (half2x2)'
+
+// Case 1: A function declared with overloads for half float and double types.
+// (a) When called with half, it will resolve to half because half is an exact
+// match.
+// (b) When called with float it will resolve to float because float is an
+// exact match.
+// (c) When called with double it will resolve to double because it is an
+// exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case1 'void (half2x2, float2x2, double2x2)'
+void Case1(half2x2 H, float2x2 F, double2x2 D) {
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'HalfFloatDouble' 'void (half2x2)'
+ HalfFloatDouble(H);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'HalfFloatDouble' 'void (float2x2)'
+ HalfFloatDouble(F);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'HalfFloatDouble' 'void (double2x2)'
+ HalfFloatDouble(D);
+}
+
+// Case 2: A function declared with double and float overlaods.
+// (a) When called with half, it fails to resulve the ambiguous promotion.
+// (b) When called with float it will resolve to float because float is an
+// exact match.
+// (c) When called with double it will resolve to double because it is an
+// exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case2 'void (half2x2, float2x2, double2x2)'
+void Case2(half2x2 H, float2x2 F, double2x2 D) {
+#if ERROR
+ FloatDouble(H); // expected-error {{call to 'FloatDouble' is ambiguous}}
+#endif
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'FloatDouble' 'void (float2x2)'
+ FloatDouble(F);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'FloatDouble' 'void (double2x2)'
+ FloatDouble(D);
+}
+
+// Case 3: A function declared with half and double overloads
+// (a) When called with half, it will resolve to half because it is an exact
+// match.
+// (b) When called with flaot, it will resolve to double because double is a
+// valid promotion.
+// (c) When called with double, it will resolve to double because it is an
+// exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case3 'void (half2x2, float2x2, double2x2)'
+void Case3(half2x2 H, float2x2 F, double2x2 D) {
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'HalfDouble' 'void (half2x2)'
+ HalfDouble(H);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'HalfDouble' 'void (double2x2)'
+ HalfDouble(F);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'HalfDouble' 'void (double2x2)'
+ HalfDouble(D);
+}
+
+// Case 4: A function declared with half and float overloads.
+// (a) When called with half, it will resolve to half because half is an exact
+// match.
+// (b) When called with float it will resolve to float because float is an
+// exact match.
+// (c) When called with double it fails to resolve the ambigjuous conversion.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case4 'void (half2x2, float2x2, double2x2)'
+void Case4(half2x2 H, float2x2 F, double2x2 D) {
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'HalfFloat' 'void (half2x2)'
+ HalfFloat(H);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'HalfFloat' 'void (float2x2)'
+ HalfFloat(F);
+
+#if ERROR
+ HalfFloat(D); // expected-error{{call to 'HalfFloat' is ambiguous}}
+#endif
+}
+
+// Case 5: A function declared with only a double overload.
+// (a) When called with half, it will resolve to double because double is a
+// valid promotion.
+// (b) When called with float it will resolve to double because double is a
+// valid promotion.
+// (c) When called with double it will resolve to double because it is an
+// exact match.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case5 'void (half2x2, float2x2, double2x2)'
+void Case5(half2x2 H, float2x2 F, double2x2 D) {
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'Double' 'void (double2x2)'
+ Double(H);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'Double' 'void (double2x2)'
+ Double(F);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (double2x2)' lvalue Function {{.*}} 'Double' 'void (double2x2)'
+ Double(D);
+}
+
+// Case 6: A function declared with only a float overload.
+// (a) When called with half, it will resolve to float because float is a
+// valid promotion.
+// (b) When called with float it will resolve to float because float is an
+// exact match.
+// (c) When called with double it will resolve to float because it is a
+// valid conversion.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case6 'void (half2x2, float2x2, double2x2)'
+void Case6(half2x2 H, float2x2 F, double2x2 D) {
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'Float' 'void (float2x2)'
+ Float(H);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'Float' 'void (float2x2)'
+ Float(F);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'Float' 'void (float2x2)'
+ Float(D); // TODO: See #168944. Make this an expect warning. {{implicit conversion loses floating-point precision: 'double2x2' (aka 'matrix<double, 2, 2>') to 'matrix<float, 2, 2>' (matrix of 2 'float' values)}}
+}
+
+// Case 7: A function declared with only a half overload.
+// (a) When called with half, it will resolve to half because half is an
+// exact match
+// (b) When called with float it will resolve to half because half is a
+// valid conversion.
+// (c) When called with double it will resolve to float because it is a
+// valid conversion.
+
+// CHECK-LABEL: FunctionDecl {{.*}} Case7 'void (half2x2, float2x2, double2x2)'
+void Case7(half2x2 H, float2x2 F, double2x2 D) {
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'Half' 'void (half2x2)'
+ Half(H);
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'Half' 'void (half2x2)'
+ Half(F); // TODO: See #168944. Make this an expect warning. {{implicit conversion loses floating-point precision: 'float2x2' (aka 'matrix<float, 2, 2>') to 'matrix<half, 2, 2>' (matrix of 4 'half' values)}}
+
+ // CHECK: CallExpr {{.*}} 'void'
+ // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(half2x2)' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr {{.*}} 'void (half2x2)' lvalue Function {{.*}} 'Half' 'void (half2x2)'
+ Half(D); // TODO: See #168944. Make this an expect warning. {{implicit conversion loses floating-point precision: 'double2x2' (aka 'matrix<double, 2, 2>') to 'matrix<half, 2, 2>' (matrix of 4 'half' values)}}
+}
+
+void fn3x2(float3x2) {} // expected-note{{candidate function}}
+void fn2x2(float2x2) {}
+void fn2x2IO(inout float2x2) {}
+void fnI2x2IO(inout int2x2) {}
+
+void matOrVec(float4 F) {}
+void matOrVec(float2x2 F) {}
+
+void matOrVec2(float3 F) {} // expected-note{{candidate function}}
+void matOrVec2(float2x3 F) {} // expected-note{{candidate function}}
+
+export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
+ int2x2 i22 = f23;
+ //CHECK: VarDecl {{.*}} i22 'int2x2':'matrix<int, 2, 2>' cinit
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2x2':'matrix<int, 2, 2>' <FloatingToIntegral>
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+#ifdef ERROR
+ int3x2 i32 = f23; // expected-error{{cannot initialize a variable of type 'matrix<int, 3, 2>' with an lvalue of type 'matrix<float, 2, 3>'}}
+ fn3x2(f23); // expected-error{{no matching function for call to 'fn3x2'}}
+#endif
+
+ fn2x2(f23);
+ //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'fn2x2' 'void (float2x2)'
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+
+#ifdef ERROR
+ fn2x2IO(f23); // expected-error{{assigning to 'matrix<[2 * ...], 3>' from incompatible type 'matrix<[2 * ...], 2>'}}
+ fnI2x2IO(f23); // expected-error{{assigning to 'matrix<float, [...], 3>' from incompatible type 'matrix<int, [...], 2>'}}
+#endif
+
+ matOrVec(f23);
+ //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'matOrVec' 'void (float2x2)'
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+
+ matOrVec(f44);
+ //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'matOrVec' 'void (float2x2)'
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' <LValueToRValue>
+
+#ifdef ERROR
+ matOrVec(2.0); // TODO: See #168960 this should be ambiguous once we implement ICK_HLSL_Matrix_Splat.
+#endif
+ matOrVec2(f23);
+ //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
+
+ matOrVec2(f44);
+ //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' <HLSLMatrixTruncation>
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' <LValueToRValue>
+
+ matOrVec2(f33);
+ //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' <HLSLMatrixTruncation>
+ //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' <LValueToRValue>
+
+#ifdef ERROR
+ matOrVec2(f32); // expected-error{{no matching function for call to 'matOrVec2'}}
+#endif
+}
>From 3e13f119828fbc6783bda5b51269b1b43d4e977e Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 21 Nov 2025 15:25:20 -0500
Subject: [PATCH 04/12] add implicit trunc warnings
---
.../clang/Basic/DiagnosticSemaKinds.td | 6 +++
clang/lib/Sema/SemaChecking.cpp | 14 ++++++
.../MatrixImplicitTruncCastWarnings.hlsl | 50 +++++++++++++++++++
3 files changed, 70 insertions(+)
create mode 100644 clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4a145fd71eedd..a107e7eea8b79 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4356,6 +4356,9 @@ def warn_param_typestate_mismatch : Warning<
def warn_unknown_sanitizer_ignored : Warning<
"unknown sanitizer '%0' ignored">, InGroup<UnknownSanitizers>;
+def warn_impcast_matrix_scalar : Warning<
+ "implicit conversion turns matrix to scalar: %0 to %1">,
+ InGroup<Conversion>, DefaultIgnore;
def warn_impcast_vector_scalar : Warning<
"implicit conversion turns vector to scalar: %0 to %1">,
InGroup<Conversion>, DefaultIgnore;
@@ -13276,6 +13279,9 @@ def err_hlsl_builtin_scalar_vector_mismatch
def warn_hlsl_impcast_vector_truncation : Warning<
"implicit conversion truncates vector: %0 to %1">, InGroup<Conversion>;
+def warn_hlsl_impcast_matrix_truncation : Warning<
+ "implicit conversion truncates matrix: %0 to %1">, InGroup<Conversion>;
+
def warn_hlsl_availability : Warning<
"%0 is only available %select{|in %4 environment }3on %1 %2 or newer">,
InGroup<HLSLAvailability>, DefaultError;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0ffb4854ba86d..cbb909ebbe9cf 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -37,6 +37,7 @@
#include "clang/AST/TemplateBase.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
+#include "clang/AST/TypeBase.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/UnresolvedSet.h"
#include "clang/Basic/AddressSpaces.h"
@@ -12591,6 +12592,19 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
if (auto VecTy = dyn_cast<VectorType>(Target))
Target = VecTy->getElementType().getTypePtr();
+ if (isa<ConstantMatrixType>(Source)) {
+ if (!isa<ConstantMatrixType>(Target)) {
+ return DiagnoseImpCast(*this, E, T, CC, diag::warn_impcast_matrix_scalar);
+ } else if (getLangOpts().HLSL &&
+ Target->castAs<ConstantMatrixType>()->getNumElementsFlattened() <
+ Source->castAs<ConstantMatrixType>()
+ ->getNumElementsFlattened()) {
+ // Diagnose Matrix truncation but don't return. We may also want to
+ // diagnose an element conversion.
+ DiagnoseImpCast(*this, E, T, CC,
+ diag::warn_hlsl_impcast_matrix_truncation);
+ }
+ }
// Strip complex types.
if (isa<ComplexType>(Source)) {
if (!isa<ComplexType>(Target)) {
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
new file mode 100644
index 0000000000000..360c9f7f31b15
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -Wconversion -verify %s
+
+export int3x4 trunc_cast(int4x4 i44) {
+ int3x4 i34 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 4>'}}
+ return i34;
+}
+
+export int4x3 trunc_cast0(int4x4 i44) {
+ int4x3 i43 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 4, 3>'}}
+ return i43;
+}
+
+export int3x3 trunc_cast1(int4x4 i44) {
+ int3x3 i33 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 3>'}}
+ return i33;
+}
+
+export int3x2 trunc_cast2(int4x4 i44) {
+ int3x2 i32 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 2>'}}
+ return i32;
+}
+
+export int2x3 trunc_cast3(int4x4 i44) {
+ int2x3 i23 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 3>'}}
+ return i23;
+}
+
+export int2x2 trunc_cast4(int4x4 i44) {
+ int2x2 i22 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 2>'}}
+ return i22;
+}
+
+export int2x1 trunc_cast5(int4x4 i44) {
+ int2x1 i21 = i44;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'int4x4' (aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 1>'}}
+ return i21;
+}
+
+export int trunc_scalar_cast6(int4x4 i44) {
+ int i1 = i44;
+ // expected-warning at -1{{implicit conversion turns matrix to scalar: 'int4x4' (aka 'matrix<int, 4, 4>') to 'int'}}
+ return i1;
+}
+
>From 023ef0eaa0ddf9e68267f5b595a7ffab8d535eb6 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 21 Nov 2025 16:09:19 -0500
Subject: [PATCH 05/12] create a -Wmatrix-conversion diagnostic group. Move
some of the vector warnings to the Vector diagnostic group
---
clang/include/clang/Basic/DiagnosticGroups.td | 3 +++
clang/include/clang/Basic/DiagnosticSemaKinds.td | 8 ++++----
clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl | 6 ++++++
.../BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl | 2 +-
4 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 2fff32bbc4d6c..d99a6b8a568c7 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1061,6 +1061,7 @@ def SuperSubClassMismatch : DiagGroup<"super-class-method-mismatch">;
def OverridingMethodMismatch : DiagGroup<"overriding-method-mismatch">;
def VariadicMacros : DiagGroup<"variadic-macros">;
def VectorConversion : DiagGroup<"vector-conversion">; // clang specific
+def MatrixConversion : DiagGroup<"matrix-conversion">; // clang specific
def VexingParse : DiagGroup<"vexing-parse">;
def VLAUseStaticAssert : DiagGroup<"vla-extension-static-assert">;
def VLACxxExtension : DiagGroup<"vla-cxx-extension", [VLAUseStaticAssert]>;
@@ -1335,6 +1336,8 @@ def : DiagGroup<"int-conversions",
[IntConversion]>; // -Wint-conversions = -Wint-conversion
def : DiagGroup<"vector-conversions",
[VectorConversion]>; // -Wvector-conversions = -Wvector-conversion
+def : DiagGroup<"matrix-conversions",
+ [MatrixConversion]>; // -Wvector-conversions = -Wmatrix-conversion
def : DiagGroup<"unused-local-typedefs", [UnusedLocalTypedef]>;
// -Wunused-local-typedefs = -Wunused-local-typedef
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a107e7eea8b79..97e3418f6d22b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4358,10 +4358,10 @@ def warn_unknown_sanitizer_ignored : Warning<
def warn_impcast_matrix_scalar : Warning<
"implicit conversion turns matrix to scalar: %0 to %1">,
- InGroup<Conversion>, DefaultIgnore;
+ InGroup<MatrixConversion>, DefaultIgnore;
def warn_impcast_vector_scalar : Warning<
"implicit conversion turns vector to scalar: %0 to %1">,
- InGroup<Conversion>, DefaultIgnore;
+ InGroup<VectorConversion>, DefaultIgnore;
def warn_impcast_complex_scalar : Warning<
"implicit conversion discards imaginary component: %0 to %1">,
InGroup<Conversion>, DefaultIgnore;
@@ -13277,10 +13277,10 @@ def err_hlsl_builtin_scalar_vector_mismatch
"vector type with matching scalar element type%diff{: $ vs $|}2,3">;
def warn_hlsl_impcast_vector_truncation : Warning<
- "implicit conversion truncates vector: %0 to %1">, InGroup<Conversion>;
+ "implicit conversion truncates vector: %0 to %1">, InGroup<VectorConversion>;
def warn_hlsl_impcast_matrix_truncation : Warning<
- "implicit conversion truncates matrix: %0 to %1">, InGroup<Conversion>;
+ "implicit conversion truncates matrix: %0 to %1">, InGroup<MatrixConversion>;
def warn_hlsl_availability : Warning<
"%0 is only available %select{|in %4 environment }3on %1 %2 or newer">,
diff --git a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
index ebe4db7a9e26e..e92688ac1a920 100644
--- a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
+++ b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
@@ -236,6 +236,7 @@ void matOrVec2(float2x3 F) {} // expected-note{{candidate function}}
export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
int2x2 i22 = f23;
+ // expected-warning at -1{{implicit conversion truncates matrix: 'float2x3' (aka 'matrix<float, 2, 3>') to 'int2x2' (aka 'matrix<int, 2, 2>')}}
//CHECK: VarDecl {{.*}} i22 'int2x2':'matrix<int, 2, 2>' cinit
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2x2':'matrix<int, 2, 2>' <FloatingToIntegral>
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
@@ -245,6 +246,7 @@ export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
#endif
fn2x2(f23);
+ // expected-warning at -1{{implicit conversion truncates matrix: 'float2x3' (aka 'matrix<float, 2, 3>') to 'matrix<float, 2, 2>'}}
//CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'fn2x2' 'void (float2x2)'
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
@@ -255,11 +257,13 @@ export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
#endif
matOrVec(f23);
+ // expected-warning at -1{{implicit conversion truncates matrix: 'float2x3' (aka 'matrix<float, 2, 3>') to 'matrix<float, 2, 2>'}}
//CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'matOrVec' 'void (float2x2)'
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
matOrVec(f44);
+ // expected-warning at -1{{implicit conversion truncates matrix: 'float4x4' (aka 'matrix<float, 4, 4>') to 'matrix<float, 2, 2>'}}
//CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'matOrVec' 'void (float2x2)'
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' <HLSLMatrixTruncation>
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' <LValueToRValue>
@@ -272,11 +276,13 @@ export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' <LValueToRValue>
matOrVec2(f44);
+ // expected-warning at -1{{implicit conversion truncates matrix: 'float4x4' (aka 'matrix<float, 4, 4>') to 'matrix<float, 2, 3>'}}
//CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' <HLSLMatrixTruncation>
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' <LValueToRValue>
matOrVec2(f33);
+ // expected-warning at -1{{implicit conversion truncates matrix: 'float3x3' (aka 'matrix<float, 3, 3>') to 'matrix<float, 2, 3>'}}
//CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 'matOrVec2' 'void (float2x3)'
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' <HLSLMatrixTruncation>
//CHECK-NEXT: ImplicitCastExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' <LValueToRValue>
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
index 360c9f7f31b15..7d51a2062b3ae 100644
--- a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -Wconversion -verify %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -Wmatrix-conversion -verify %s
export int3x4 trunc_cast(int4x4 i44) {
int3x4 i34 = i44;
>From 0259c34bd884498bd7d376ad4bf6fdd6e9b4fc28 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 21 Nov 2025 16:46:26 -0500
Subject: [PATCH 06/12] fix CI failure, revert changes to
warn_impcast_matrix_scalar. the VectorConversion group is more invasive than
expected
---
clang/include/clang/Basic/DiagnosticGroups.td | 2 +-
clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +-
clang/test/Driver/autocomplete.c | 2 ++
3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index d99a6b8a568c7..063957e7b18ae 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1337,7 +1337,7 @@ def : DiagGroup<"int-conversions",
def : DiagGroup<"vector-conversions",
[VectorConversion]>; // -Wvector-conversions = -Wvector-conversion
def : DiagGroup<"matrix-conversions",
- [MatrixConversion]>; // -Wvector-conversions = -Wmatrix-conversion
+ [MatrixConversion]>; // -Wmatrix-conversions = -Wmatrix-conversion
def : DiagGroup<"unused-local-typedefs", [UnusedLocalTypedef]>;
// -Wunused-local-typedefs = -Wunused-local-typedef
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 97e3418f6d22b..7af88461738c4 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4361,7 +4361,7 @@ def warn_impcast_matrix_scalar : Warning<
InGroup<MatrixConversion>, DefaultIgnore;
def warn_impcast_vector_scalar : Warning<
"implicit conversion turns vector to scalar: %0 to %1">,
- InGroup<VectorConversion>, DefaultIgnore;
+ InGroup<Conversion>, DefaultIgnore;
def warn_impcast_complex_scalar : Warning<
"implicit conversion discards imaginary component: %0 to %1">,
InGroup<Conversion>, DefaultIgnore;
diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c
index 4983b71496834..1fd60929751ee 100644
--- a/clang/test/Driver/autocomplete.c
+++ b/clang/test/Driver/autocomplete.c
@@ -117,6 +117,8 @@
// WARNING-NEXT: -Wmany-braces-around-scalar-init
// WARNING-NEXT: -Wmath-errno-enabled-with-veclib
// WARNING-NEXT: -Wmathematical-notation-identifier-extension
+// WARNING-NEXT: -Wmatrix-conversion
+// WARNING-NEXT: -Wmatrix-conversions
// WARNING-NEXT: -Wmax-tokens
// WARNING-NEXT: -Wmax-unsigned-zero
// RUN: %clang --autocomplete=-Wno-invalid-pp- | FileCheck %s -check-prefix=NOWARNING
>From d60c0ce6f61daf76f3a294fca7f52d3f9e81900c Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 25 Nov 2025 15:08:46 -0500
Subject: [PATCH 07/12] address pr comments
---
.../clang/Basic/DiagnosticSemaKinds.td | 2 +-
clang/lib/CodeGen/CGExprScalar.cpp | 54 ++++++++++++++-----
clang/lib/Sema/SemaOverload.cpp | 2 +-
.../BasicFeatures/MatrixElementTypeCast.hlsl | 33 ++++++++++++
.../MatrixExplicitTruncation.hlsl | 12 ++---
.../MatrixImplicitTruncation.hlsl | 12 ++---
6 files changed, 87 insertions(+), 28 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7af88461738c4..17d2a78b96f9f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4358,7 +4358,7 @@ def warn_unknown_sanitizer_ignored : Warning<
def warn_impcast_matrix_scalar : Warning<
"implicit conversion turns matrix to scalar: %0 to %1">,
- InGroup<MatrixConversion>, DefaultIgnore;
+ InGroup<MatrixConversion>;
def warn_impcast_vector_scalar : Warning<
"implicit conversion turns vector to scalar: %0 to %1">,
InGroup<Conversion>, DefaultIgnore;
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index a9e2ebdffa59a..94f27cd3da8a0 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2426,17 +2426,26 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
assert(LoadList.size() >= MatTy->getNumElementsFlattened() &&
"Flattened type on RHS must have the same number or more elements "
"than vector on LHS.");
+
llvm::Value *V =
CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
// write to V.
- for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
- RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
- assert(RVal.isScalar() &&
- "All flattened source values should be scalars.");
- llvm::Value *Cast =
- CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
- MatTy->getElementType(), Loc);
- V = CGF.Builder.CreateInsertElement(V, Cast, I);
+ unsigned NumCols = MatTy->getNumColumns();
+ unsigned NumRows = MatTy->getNumRows();
+ unsigned ColOffset = NumCols;
+ if (auto *SrcMatTy = SrcVal.getType()->getAs<ConstantMatrixType>())
+ ColOffset = SrcMatTy->getNumColumns();
+ for (unsigned R = 0; R < NumRows; R++) {
+ for (unsigned C = 0; C < NumCols; C++) {
+ unsigned I = R * ColOffset + C;
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+ assert(RVal.isScalar() &&
+ "All flattened source values should be scalars.");
+ llvm::Value *Cast =
+ CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+ MatTy->getElementType(), Loc);
+ V = CGF.Builder.CreateInsertElement(V, Cast, I);
+ }
}
return V;
}
@@ -2978,9 +2987,17 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Value *Mat = Visit(E);
if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
SmallVector<int> Mask;
- unsigned NumElts = MatTy->getNumElementsFlattened();
- for (unsigned I = 0; I != NumElts; ++I)
- Mask.push_back(I);
+ unsigned NumCols = MatTy->getNumColumns();
+ unsigned NumRows = MatTy->getNumRows();
+ unsigned ColOffset = NumCols;
+ if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
+ ColOffset = SrcMatTy->getNumColumns();
+ for (unsigned R = 0; R < NumRows; R++) {
+ for (unsigned C = 0; C < NumCols; C++) {
+ unsigned I = R * ColOffset + C;
+ Mask.push_back(I);
+ }
+ }
return Builder.CreateShuffleVector(Mat, Mask, "trunc");
}
@@ -2991,11 +3008,20 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
RValue RV = CGF.EmitAnyExpr(E);
SourceLocation Loc = CE->getExprLoc();
- assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast.");
- // RHS is an aggregate
- LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType());
+ Address SrcAddr = Address::invalid();
+
+ if (RV.isAggregate()) {
+ SrcAddr = RV.getAggregateAddress();
+ } else {
+ SrcAddr = CGF.CreateMemTemp(E->getType(), "hlsl.ewcast.src");
+ LValue TmpLV = CGF.MakeAddrLValue(SrcAddr, E->getType());
+ CGF.EmitStoreThroughLValue(RV, TmpLV);
+ }
+
+ LValue SrcVal = CGF.MakeAddrLValue(SrcAddr, E->getType());
return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc);
}
+
} // end of switch
llvm_unreachable("unknown scalar cast");
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 1acbda85601af..17c98e9b50aa9 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -2114,7 +2114,7 @@ static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
ImplicitConversionKind &ICK,
ImplicitConversionKind &ElConv, Expr *From,
bool InOverloadResolution, bool CStyle) {
- // The non HLSL Matrix conversion rules are not clear.
+ // Implicit conversions for matrices are an HLSL feature not present in C/C++.
if (!S.getLangOpts().HLSL)
return false;
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
index 081b8013efcbc..eb79e6e46d83a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -184,3 +184,36 @@ struct Derived : BFields {
void call4(Derived D) {
int2x2 A = (int2x2)D;
}
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z5call5Dv4_f(
+// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[M_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[M2:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 4
+// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 16
+// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 16
+// CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
+// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[VECEXT]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
+// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[VECEXT1]], i64 1
+// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
+// CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
+// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[VECEXT2]], i64 2
+// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
+// CHECK-NEXT: [[VECEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
+// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[VECEXT3]], i64 3
+// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[M2]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[M2]], align 4
+// CHECK-NEXT: ret <4 x float> [[TMP10]]
+//
+float2x2 call5(float4 v) {
+ float2x2 m = (float2x2)v;
+ return m;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
index f16d01e1d12ea..f3c4bc496d5a4 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
@@ -25,7 +25,7 @@
// CHECK-NEXT: [[I43:%.*]] = alloca [12 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -42,7 +42,7 @@
// CHECK-NEXT: [[I33:%.*]] = alloca [9 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10>
// CHECK-NEXT: store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
// CHECK-NEXT: ret <9 x i32> [[TMP1]]
@@ -59,7 +59,7 @@
// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -76,7 +76,7 @@
// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -93,7 +93,7 @@
// CHECK-NEXT: [[I22:%.*]] = alloca [4 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
// CHECK-NEXT: store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
@@ -110,7 +110,7 @@
// CHECK-NEXT: [[I21:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
// CHECK-NEXT: ret <2 x i32> [[TMP1]]
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
index 6a53d2e8ee96c..e621f68623bd1 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -25,7 +25,7 @@
// CHECK-NEXT: [[I43:%.*]] = alloca [12 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -42,7 +42,7 @@
// CHECK-NEXT: [[I33:%.*]] = alloca [9 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10>
// CHECK-NEXT: store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
// CHECK-NEXT: ret <9 x i32> [[TMP1]]
@@ -59,7 +59,7 @@
// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -76,7 +76,7 @@
// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -93,7 +93,7 @@
// CHECK-NEXT: [[I22:%.*]] = alloca [4 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
// CHECK-NEXT: store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
@@ -110,7 +110,7 @@
// CHECK-NEXT: [[I21:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
// CHECK-NEXT: ret <2 x i32> [[TMP1]]
>From 3a48516c7d051a266efcd36ac4b11f7950ecc885 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 25 Nov 2025 20:55:22 -0500
Subject: [PATCH 08/12] fix element cast indexing to pass offload test
---
clang/lib/CodeGen/CGExprScalar.cpp | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 94f27cd3da8a0..01123b78abba2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2430,22 +2430,17 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
llvm::Value *V =
CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
// write to V.
- unsigned NumCols = MatTy->getNumColumns();
- unsigned NumRows = MatTy->getNumRows();
- unsigned ColOffset = NumCols;
- if (auto *SrcMatTy = SrcVal.getType()->getAs<ConstantMatrixType>())
- ColOffset = SrcMatTy->getNumColumns();
- for (unsigned R = 0; R < NumRows; R++) {
- for (unsigned C = 0; C < NumCols; C++) {
- unsigned I = R * ColOffset + C;
- RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
- assert(RVal.isScalar() &&
- "All flattened source values should be scalars.");
- llvm::Value *Cast =
- CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
- MatTy->getElementType(), Loc);
- V = CGF.Builder.CreateInsertElement(V, Cast, I);
- }
+ for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
+ unsigned ColMajorIndex =
+ (I % MatTy->getNumRows()) * MatTy->getNumColumns() +
+ (I / MatTy->getNumRows());
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[ColMajorIndex], Loc);
+ assert(RVal.isScalar() &&
+ "All flattened source values should be scalars.");
+ llvm::Value *Cast = CGF.EmitScalarConversion(
+ RVal.getScalarVal(), LoadList[ColMajorIndex].getType(),
+ MatTy->getElementType(), Loc);
+ V = CGF.Builder.CreateInsertElement(V, Cast, I);
}
return V;
}
>From 4fb00914b90736771b011e3764766252e953889c Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Wed, 26 Nov 2025 00:28:33 -0500
Subject: [PATCH 09/12] fix test failure
---
.../BasicFeatures/MatrixElementTypeCast.hlsl | 26 +++++++++----------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
index eb79e6e46d83a..3bd7636212862 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -168,14 +168,14 @@ struct Derived : BFields {
// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[GEP1]], align 8
// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[TMP1]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[CONV]], i64 0
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP2]], align 4
+// CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP3]] to i32
+// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[CONV4]], i64 1
// CHECK-NEXT: [[BF_LOAD:%.*]] = load i24, ptr [[E]], align 1
// CHECK-NEXT: [[BF_SHL:%.*]] = shl i24 [[BF_LOAD]], 9
// CHECK-NEXT: [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
// CHECK-NEXT: [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
-// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[BF_CAST]], i64 1
-// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
-// CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP4]] to i32
-// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[CONV4]], i64 2
+// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BF_CAST]], i64 2
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP3]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 3
// CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[A]], align 4
@@ -186,14 +186,14 @@ void call4(Derived D) {
}
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z5call5Dv4_f(
-// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[M_ADDR:%.*]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[M2:%.*]] = alloca [4 x float], align 4
+// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[M:%.*]] = alloca [4 x float], align 4
// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca <4 x float>, align 16
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 4
-// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 16
+// CHECK-NEXT: store <4 x float> [[V]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V_ADDR]], align 16
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 16
// CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 4
@@ -201,16 +201,16 @@ void call4(Derived D) {
// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[VECEXT]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
-// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[VECEXT1]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
-// CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
+// CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[VECEXT2]], i64 2
// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
// CHECK-NEXT: [[VECEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[VECEXT3]], i64 3
-// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[M2]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[M2]], align 4
+// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[M]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[M]], align 4
// CHECK-NEXT: ret <4 x float> [[TMP10]]
//
float2x2 call5(float4 v) {
>From e6b4ab6c66c65c54d5ec10cd4bb7494d35093dd2 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 2 Dec 2025 12:15:09 -0500
Subject: [PATCH 10/12] address pr comments
---
clang/lib/CodeGen/CGExprScalar.cpp | 32 ++++++++-----------
clang/lib/Sema/SemaChecking.cpp | 17 +++++-----
.../MatrixElementOverloadResolution.hlsl | 4 +--
.../MatrixImplicitTruncCastWarnings.hlsl | 2 +-
4 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 01123b78abba2..0fc11f641d7f2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2429,7 +2429,7 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
llvm::Value *V =
CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
- // write to V.
+ // V is an allocated temporary to build the truncated matrix into.
for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
unsigned ColMajorIndex =
(I % MatTy->getNumRows()) * MatTy->getNumColumns() +
@@ -2977,27 +2977,23 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc");
}
case CK_HLSLMatrixTruncation: {
- assert((DestTy->isMatrixType() || DestTy->isBuiltinType()) &&
+ assert((DestTy->isConstantMatrixType() || DestTy->isBuiltinType()) &&
"Destination type must be a matrix or builtin type.");
Value *Mat = Visit(E);
- if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
- SmallVector<int> Mask;
- unsigned NumCols = MatTy->getNumColumns();
- unsigned NumRows = MatTy->getNumRows();
- unsigned ColOffset = NumCols;
- if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
- ColOffset = SrcMatTy->getNumColumns();
- for (unsigned R = 0; R < NumRows; R++) {
- for (unsigned C = 0; C < NumCols; C++) {
- unsigned I = R * ColOffset + C;
- Mask.push_back(I);
- }
+ auto *MatTy = DestTy->getAs<ConstantMatrixType>();
+ SmallVector<int> Mask;
+ unsigned NumCols = MatTy->getNumColumns();
+ unsigned NumRows = MatTy->getNumRows();
+ unsigned ColOffset = NumCols;
+ if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
+ ColOffset = SrcMatTy->getNumColumns();
+ for (unsigned R = 0; R < NumRows; R++) {
+ for (unsigned C = 0; C < NumCols; C++) {
+ unsigned I = R * ColOffset + C;
+ Mask.push_back(I);
}
-
- return Builder.CreateShuffleVector(Mat, Mask, "trunc");
}
- llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
- return Builder.CreateExtractElement(Mat, Zero, "cast.mtrunc");
+ return Builder.CreateShuffleVector(Mat, Mask, "trunc");
}
case CK_HLSLElementwiseCast: {
RValue RV = CGF.EmitAnyExpr(E);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cbb909ebbe9cf..72202f1f67ab4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -12571,9 +12571,10 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
if (SourceMgr.isInSystemMacro(CC))
return;
return DiagnoseImpCast(*this, E, T, CC, diag::warn_impcast_vector_scalar);
- } else if (getLangOpts().HLSL &&
- Target->castAs<VectorType>()->getNumElements() <
- Source->castAs<VectorType>()->getNumElements()) {
+ }
+ if (getLangOpts().HLSL &&
+ Target->castAs<VectorType>()->getNumElements() <
+ Source->castAs<VectorType>()->getNumElements()) {
// Diagnose vector truncation but don't return. We may also want to
// diagnose an element conversion.
DiagnoseImpCast(*this, E, T, CC,
@@ -12593,12 +12594,12 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
Target = VecTy->getElementType().getTypePtr();
if (isa<ConstantMatrixType>(Source)) {
- if (!isa<ConstantMatrixType>(Target)) {
+ if (Target->isScalarType())
return DiagnoseImpCast(*this, E, T, CC, diag::warn_impcast_matrix_scalar);
- } else if (getLangOpts().HLSL &&
- Target->castAs<ConstantMatrixType>()->getNumElementsFlattened() <
- Source->castAs<ConstantMatrixType>()
- ->getNumElementsFlattened()) {
+
+ if (getLangOpts().HLSL &&
+ Target->castAs<ConstantMatrixType>()->getNumElementsFlattened() <
+ Source->castAs<ConstantMatrixType>()->getNumElementsFlattened()) {
// Diagnose Matrix truncation but don't return. We may also want to
// diagnose an element conversion.
DiagnoseImpCast(*this, E, T, CC,
diff --git a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
index e92688ac1a920..04149e176edbd 100644
--- a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
+++ b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -Wconversion -verify -o - %s -DERROR=1
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -Wno-conversion -ast-dump %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -verify -o - %s -DERROR=1
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -fnative-half-type -finclude-default-header -ast-dump %s | FileCheck %s
// This test verifies floating point type implicit conversion ranks for overload
// resolution. In HLSL the built-in type ranks are half < float < double. This
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
index 7d51a2062b3ae..2c50b957578ec 100644
--- a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -Wmatrix-conversion -verify %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s
export int3x4 trunc_cast(int4x4 i44) {
int3x4 i34 = i44;
>From fef4b65f095963bfc9ae933675e039981adce8f6 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 2 Dec 2025 12:42:09 -0500
Subject: [PATCH 11/12] fix to support BuiltinType
---
clang/lib/CodeGen/CGExprScalar.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 0fc11f641d7f2..f95a494732ef9 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2980,10 +2980,13 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
assert((DestTy->isConstantMatrixType() || DestTy->isBuiltinType()) &&
"Destination type must be a matrix or builtin type.");
Value *Mat = Visit(E);
- auto *MatTy = DestTy->getAs<ConstantMatrixType>();
+ unsigned NumCols = 1;
+ unsigned NumRows = 1;
SmallVector<int> Mask;
- unsigned NumCols = MatTy->getNumColumns();
- unsigned NumRows = MatTy->getNumRows();
+ if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+ NumCols = MatTy->getNumColumns();
+ NumRows = MatTy->getNumRows();
+ }
unsigned ColOffset = NumCols;
if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
ColOffset = SrcMatTy->getNumColumns();
>From 71a3ece410f75e201e94508b4a12b34b61afda9a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 2 Dec 2025 13:13:18 -0500
Subject: [PATCH 12/12] rever changes to CGExprScalar.cpp as the suggestion was
a missunderstanding. fix up some typos
---
clang/lib/CodeGen/CGExprScalar.cpp | 31 +++++++++++++++---------------
clang/lib/Sema/SemaChecking.cpp | 2 +-
clang/lib/Sema/SemaOverload.cpp | 2 +-
3 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index f95a494732ef9..769bc37b0e131 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2977,26 +2977,27 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc");
}
case CK_HLSLMatrixTruncation: {
- assert((DestTy->isConstantMatrixType() || DestTy->isBuiltinType()) &&
+ assert((DestTy->isMatrixType() || DestTy->isBuiltinType()) &&
"Destination type must be a matrix or builtin type.");
Value *Mat = Visit(E);
- unsigned NumCols = 1;
- unsigned NumRows = 1;
- SmallVector<int> Mask;
if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
- NumCols = MatTy->getNumColumns();
- NumRows = MatTy->getNumRows();
- }
- unsigned ColOffset = NumCols;
- if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
- ColOffset = SrcMatTy->getNumColumns();
- for (unsigned R = 0; R < NumRows; R++) {
- for (unsigned C = 0; C < NumCols; C++) {
- unsigned I = R * ColOffset + C;
- Mask.push_back(I);
+ SmallVector<int> Mask;
+ unsigned NumCols = MatTy->getNumColumns();
+ unsigned NumRows = MatTy->getNumRows();
+ unsigned ColOffset = NumCols;
+ if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
+ ColOffset = SrcMatTy->getNumColumns();
+ for (unsigned R = 0; R < NumRows; R++) {
+ for (unsigned C = 0; C < NumCols; C++) {
+ unsigned I = R * ColOffset + C;
+ Mask.push_back(I);
+ }
}
+
+ return Builder.CreateShuffleVector(Mat, Mask, "trunc");
}
- return Builder.CreateShuffleVector(Mat, Mask, "trunc");
+ llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
+ return Builder.CreateExtractElement(Mat, Zero, "cast.mtrunc");
}
case CK_HLSLElementwiseCast: {
RValue RV = CGF.EmitAnyExpr(E);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 72202f1f67ab4..58de9fe48162b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -12590,7 +12590,7 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
Source = cast<VectorType>(Source)->getElementType().getTypePtr();
Target = cast<VectorType>(Target)->getElementType().getTypePtr();
}
- if (auto VecTy = dyn_cast<VectorType>(Target))
+ if (const auto *VecTy = dyn_cast<VectorType>(Target))
Target = VecTy->getElementType().getTypePtr();
if (isa<ConstantMatrixType>(Source)) {
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 17c98e9b50aa9..9a3a78164f0f8 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -2121,7 +2121,7 @@ static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
auto *ToMatrixType = ToType->getAs<ConstantMatrixType>();
auto *FromMatrixType = FromType->getAs<ConstantMatrixType>();
- // If both arguments are vectors, handle possible vector truncation and
+ // If both arguments are matrix, handle possible matrix truncation and
// element conversion.
if (ToMatrixType && FromMatrixType) {
unsigned FromCols = FromMatrixType->getNumColumns();
More information about the cfe-commits
mailing list