[clang] [X86][Clang] Allow constexpr evaluation of F16C CVTPS2PH intrinsics (PR #162295)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 11 02:50:29 PST 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/162295
>From eec4aebc63665d0b2fd4e07d42bd62e268d1f69f Mon Sep 17 00:00:00 2001
From: ericxu233 <xuhanyang5 at gmail.com>
Date: Sun, 5 Oct 2025 01:41:40 -0400
Subject: [PATCH 1/4] [X86][Clang] Allow constexpr evaluation of F16C CVTPS2PH
intrinsics
---
clang/include/clang/Basic/BuiltinsX86.td | 6 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 81 ++++++++++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 65 +++++++++++++++++++
clang/test/CodeGen/X86/f16c-builtins.c | 57 +++++++++++++++++
4 files changed, 207 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4388c09423a21..b125f986101d3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -743,11 +743,13 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
}
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "f16c",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">;
}
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "f16c",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9991e365addb8..ace2b6116d8a6 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3475,6 +3475,83 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}
+static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ // Arguments are: vector of floats, rounding immediate
+ assert(Call->getNumArgs() == 2);
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ assert(Src.getFieldDesc()->isPrimitiveArray());
+ assert(Dst.getFieldDesc()->isPrimitiveArray());
+
+ const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned SrcNumElems = SrcVTy->getNumElements();
+ const auto *DstVTy = Call->getType()->castAs<VectorType>();
+ unsigned DstNumElems = DstVTy->getNumElements();
+
+ const llvm::fltSemantics &HalfSem =
+ S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy);
+
+ // imm[2] == 1 means use MXCSR rounding mode.
+ // In that case, we can only evaluate if the conversion is exact.
+ int ImmVal = Imm.getZExtValue();
+ bool UseMXCSR = (ImmVal & 4) != 0;
+
+ llvm::RoundingMode RM;
+ if (!UseMXCSR) {
+ switch (ImmVal & 3) {
+ case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
+ case 1: RM = llvm::RoundingMode::TowardNegative; break;
+ case 2: RM = llvm::RoundingMode::TowardPositive; break;
+ case 3: RM = llvm::RoundingMode::TowardZero; break;
+ default: llvm_unreachable("Invalid immediate rounding mode");
+ }
+ } else {
+ // For MXCSR, we must check for exactness. We can use any rounding mode
+ // for the trial conversion since the result is the same if it's exact.
+ RM = llvm::RoundingMode::NearestTiesToEven;
+ }
+
+ QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
+ PrimType DstElemT = *S.getContext().classify(DstElemQT);
+ bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
+
+ for (unsigned I = 0; I < SrcNumElems; ++I) {
+ Floating SrcVal = Src.elem<Floating>(I);
+ APFloat DstVal = SrcVal.getAPFloat();
+
+ bool LostInfo;
+ APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);
+
+ if (UseMXCSR && St != APFloat::opOK) {
+ S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_dynamic_rounding);
+ return false;
+ }
+
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+ // FIX: Extract the integer value before calling 'from'.
+ uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
+ Dst.elem<T>(I) = T::from(RawBits);
+ });
+ }
+
+ // Zero out remaining elements if the destination has more elements
+ // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
+ if (DstNumElems > SrcNumElems) {
+ for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+ Dst.elem<T>(I) = T::from(0);
+ });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4632,6 +4709,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vinsertf128_si256:
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
+
+ case clang::X86::BI__builtin_ia32_vcvtps2ph:
+ case clang::X86::BI__builtin_ia32_vcvtps2ph256:
+ return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8fab6efafb983..859e88b03e4a3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13175,6 +13175,71 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+
+ case clang::X86::BI__builtin_ia32_vcvtps2ph:
+ case clang::X86::BI__builtin_ia32_vcvtps2ph256: {
+ APValue SrcVec;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SrcVec))
+ return false;
+
+ APSInt Imm;
+ if (!EvaluateInteger(E->getArg(1), Imm, Info))
+ return false;
+
+ assert(SrcVec.isVector());
+
+ const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
+ unsigned SrcNumElems = SrcVTy->getNumElements();
+ const auto *DstVTy = E->getType()->castAs<VectorType>();
+ unsigned DstNumElems = DstVTy->getNumElements();
+ QualType DstElemTy = DstVTy->getElementType();
+
+ const llvm::fltSemantics &HalfSem = Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy);
+
+ int ImmVal = Imm.getZExtValue();
+ bool UseMXCSR = (ImmVal & 4) != 0;
+
+ llvm::RoundingMode RM;
+ if (!UseMXCSR) {
+ switch (ImmVal & 3) {
+ case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
+ case 1: RM = llvm::RoundingMode::TowardNegative; break;
+ case 2: RM = llvm::RoundingMode::TowardPositive; break;
+ case 3: RM = llvm::RoundingMode::TowardZero; break;
+ default: llvm_unreachable("Invalid immediate rounding mode");
+ }
+ } else {
+ RM = llvm::RoundingMode::NearestTiesToEven;
+ }
+
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(DstNumElems);
+
+ for (unsigned I = 0; I < SrcNumElems; ++I) {
+ APFloat SrcVal = SrcVec.getVectorElt(I).getFloat();
+
+ bool LostInfo;
+ APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo);
+
+ if (UseMXCSR && St != APFloat::opOK) {
+ Info.FFDiag(E, diag::note_constexpr_dynamic_rounding);
+ return false;
+ }
+
+ APSInt DstInt(SrcVal.bitcastToAPInt(),
+ DstElemTy->isUnsignedIntegerOrEnumerationType());
+ ResultElements.push_back(APValue(DstInt));
+ }
+
+ if (DstNumElems > SrcNumElems) {
+ APSInt Zero = Info.Ctx.MakeIntValue(0, DstElemTy);
+ for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+ ResultElements.push_back(APValue(Zero));
+ }
+ }
+
+ return Success(ResultElements, E);
+ }
}
}
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index c08ef76d56981..de35c16c75ab4 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -67,3 +67,60 @@ __m128i test_mm256_cvtps_ph(__m256 a) {
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
return _mm256_cvtps_ph(a, 0);
}
+
+// A value exactly halfway between 1.0 and the next representable FP16 number.
+// In binary, its significand ends in ...000, followed by a tie-bit 1.
+#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
+
+//
+// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
+//
+// Test values: -2.5f, 1.123f, POS_HALFWAY
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+ 0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+ 0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
+));
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
+));
+
+//
+// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
+//
+// Test values: -2.5f, 1.123f, POS_HALFWAY
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+ 0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+ 0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
+));
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+ 0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
+));
+
+//
+// Tests for Exact Dynamic Rounding
+//
+// Test that dynamic rounding SUCCEEDS for exactly representable values.
+// We use _MM_FROUND_CUR_DIRECTION (value 4) to specify dynamic rounding.
+// Inputs: -2.5f, 0.125f, -16.0f are all exactly representable in FP16.
+TEST_CONSTEXPR(match_v8hi(
+ __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
+ 0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
+));
\ No newline at end of file
>From c1b4300ded98f64df737adc103644c788bf4491d Mon Sep 17 00:00:00 2001
From: ericxu233 <xuhanyang5 at gmail.com>
Date: Tue, 7 Oct 2025 10:33:02 -0400
Subject: [PATCH 2/4] clang format
---
clang/include/clang/Basic/BuiltinsX86.td | 4 ++--
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 +++++++++++++++---------
clang/lib/AST/ExprConstant.cpp | 26 +++++++++++++-------
3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b125f986101d3..5165a4d99d306 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -743,12 +743,12 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
}
-let Features = "f16c",
+let Features = "f16c",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">;
}
-let Features = "f16c",
+let Features = "f16c",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ace2b6116d8a6..0ff08e11018ad 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3503,11 +3503,20 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
llvm::RoundingMode RM;
if (!UseMXCSR) {
switch (ImmVal & 3) {
- case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
- case 1: RM = llvm::RoundingMode::TowardNegative; break;
- case 2: RM = llvm::RoundingMode::TowardPositive; break;
- case 3: RM = llvm::RoundingMode::TowardZero; break;
- default: llvm_unreachable("Invalid immediate rounding mode");
+ case 0:
+ RM = llvm::RoundingMode::NearestTiesToEven;
+ break;
+ case 1:
+ RM = llvm::RoundingMode::TowardNegative;
+ break;
+ case 2:
+ RM = llvm::RoundingMode::TowardPositive;
+ break;
+ case 3:
+ RM = llvm::RoundingMode::TowardZero;
+ break;
+ default:
+ llvm_unreachable("Invalid immediate rounding mode");
}
} else {
// For MXCSR, we must check for exactness. We can use any rounding mode
@@ -3527,7 +3536,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);
if (UseMXCSR && St != APFloat::opOK) {
- S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_dynamic_rounding);
+ S.FFDiag(S.Current->getSource(OpPC),
+ diag::note_constexpr_dynamic_rounding);
return false;
}
@@ -3542,12 +3552,10 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
if (DstNumElems > SrcNumElems) {
for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
- INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
- Dst.elem<T>(I) = T::from(0);
- });
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
}
}
-
+
Dst.initializeAllElements();
return true;
}
@@ -4709,7 +4717,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vinsertf128_si256:
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
-
+
case clang::X86::BI__builtin_ia32_vcvtps2ph:
case clang::X86::BI__builtin_ia32_vcvtps2ph256:
return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 859e88b03e4a3..0c01d38fd5739 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13194,7 +13194,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned DstNumElems = DstVTy->getNumElements();
QualType DstElemTy = DstVTy->getElementType();
- const llvm::fltSemantics &HalfSem = Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy);
+ const llvm::fltSemantics &HalfSem =
+ Info.Ctx.getFloatTypeSemantics(Info.Ctx.HalfTy);
int ImmVal = Imm.getZExtValue();
bool UseMXCSR = (ImmVal & 4) != 0;
@@ -13202,11 +13203,20 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
llvm::RoundingMode RM;
if (!UseMXCSR) {
switch (ImmVal & 3) {
- case 0: RM = llvm::RoundingMode::NearestTiesToEven; break;
- case 1: RM = llvm::RoundingMode::TowardNegative; break;
- case 2: RM = llvm::RoundingMode::TowardPositive; break;
- case 3: RM = llvm::RoundingMode::TowardZero; break;
- default: llvm_unreachable("Invalid immediate rounding mode");
+ case 0:
+ RM = llvm::RoundingMode::NearestTiesToEven;
+ break;
+ case 1:
+ RM = llvm::RoundingMode::TowardNegative;
+ break;
+ case 2:
+ RM = llvm::RoundingMode::TowardPositive;
+ break;
+ case 3:
+ RM = llvm::RoundingMode::TowardZero;
+ break;
+ default:
+ llvm_unreachable("Invalid immediate rounding mode");
}
} else {
RM = llvm::RoundingMode::NearestTiesToEven;
@@ -13217,7 +13227,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
for (unsigned I = 0; I < SrcNumElems; ++I) {
APFloat SrcVal = SrcVec.getVectorElt(I).getFloat();
-
+
bool LostInfo;
APFloat::opStatus St = SrcVal.convert(HalfSem, RM, &LostInfo);
@@ -13225,7 +13235,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
Info.FFDiag(E, diag::note_constexpr_dynamic_rounding);
return false;
}
-
+
APSInt DstInt(SrcVal.bitcastToAPInt(),
DstElemTy->isUnsignedIntegerOrEnumerationType());
ResultElements.push_back(APValue(DstInt));
>From 1141c401db423efb879ee6933ade2ff0d47cf5e4 Mon Sep 17 00:00:00 2001
From: ericxu233 <xuhanyang5 at gmail.com>
Date: Thu, 23 Oct 2025 17:38:02 -0400
Subject: [PATCH 3/4] Finished addressing review comments
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++--
clang/lib/AST/ExprConstant.cpp | 2 -
clang/test/CodeGen/X86/f16c-builtins.c | 54 ++++++++++++------------
3 files changed, 31 insertions(+), 33 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ff08e11018ad..c33b395e28ec2 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3526,9 +3526,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
PrimType DstElemT = *S.getContext().classify(DstElemQT);
- bool DstIsUnsigned = DstElemQT->isUnsignedIntegerOrEnumerationType();
- for (unsigned I = 0; I < SrcNumElems; ++I) {
+ for (unsigned I = 0; I != SrcNumElems; ++I) {
Floating SrcVal = Src.elem<Floating>(I);
APFloat DstVal = SrcVal.getAPFloat();
@@ -3542,7 +3541,8 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
}
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
- // FIX: Extract the integer value before calling 'from'.
+ // Convert the destination value's bit pattern to an unsigned integer,
+ // then reconstruct the element using the target type's 'from' method.
uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
Dst.elem<T>(I) = T::from(RawBits);
});
@@ -3551,7 +3551,7 @@ static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
// Zero out remaining elements if the destination has more elements
// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
if (DstNumElems > SrcNumElems) {
- for (unsigned I = SrcNumElems; I < DstNumElems; ++I) {
+ for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
}
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0c01d38fd5739..7e15fbd1e7805 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13186,8 +13186,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!EvaluateInteger(E->getArg(1), Imm, Info))
return false;
- assert(SrcVec.isVector());
-
const auto *SrcVTy = E->getArg(0)->getType()->castAs<VectorType>();
unsigned SrcNumElems = SrcVTy->getNumElements();
const auto *DstVTy = E->getType()->castAs<VectorType>();
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index de35c16c75ab4..47ff06b270541 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -46,71 +46,65 @@ __m128 test_mm_cvtph_ps(__m128i a) {
return _mm_cvtph_ps(a);
}
-__m256 test_mm256_cvtph_ps(__m128i a) {
- // CHECK-LABEL: test_mm256_cvtph_ps
- // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
- return _mm256_cvtph_ps(a);
-}
-TEST_CONSTEXPR(match_m256(
- _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)),
- 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
-));
-
__m128i test_mm_cvtps_ph(__m128 a) {
// CHECK-LABEL: test_mm_cvtps_ph
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
return _mm_cvtps_ph(a, 0);
}
-__m128i test_mm256_cvtps_ph(__m256 a) {
- // CHECK-LABEL: test_mm256_cvtps_ph
- // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
- return _mm256_cvtps_ph(a, 0);
-}
-
// A value exactly halfway between 1.0 and the next representable FP16 number.
// In binary, its significand ends in ...000, followed by a tie-bit 1.
#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
//
-// __builtin_ia32_vcvtps2ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
+// _mm_cvtps_ph (128-bit, 4 floats -> 8 shorts, 4 are zero-padded)
//
// Test values: -2.5f, 1.123f, POS_HALFWAY
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
0xC100, 0x3C7E, 0x3C00, 0x0000, 0, 0, 0, 0
));
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
));
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
0xC100, 0x3C7E, 0x3C01, 0x0000, 0, 0, 0, 0
));
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+ _mm_cvtps_ph(_mm_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
0xC100, 0x3C7D, 0x3C00, 0x0000, 0, 0, 0, 0
));
+__m256 test_mm256_cvtph_ps(__m128i a) {
+ // CHECK-LABEL: test_mm256_cvtph_ps
+ // CHECK: fpext <8 x half> %{{.*}} to <8 x float>
+ return _mm256_cvtph_ps(a);
+}
+TEST_CONSTEXPR(match_m256(
+ _mm256_cvtph_ps(_mm_setr_epi16(0x3C00, 0x4000, 0x4200, 0x4400, 0x4500, 0x3800, 0xC000, 0x0000)),
+ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.5f, -2.0f, 0.0f
+));
+
//
-// __builtin_ia32_vcvtps2ph256 (256-bit, 8 floats -> 8 shorts)
+// _mm256_cvtps_ph (256-bit, 8 floats -> 8 shorts)
//
// Test values: -2.5f, 1.123f, POS_HALFWAY
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEAREST_INT),
0xC100, 0x3C7E, 0x3C00, 0x0000, 0xC100, 0x3C7E, 0x3C00, 0x0000
));
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_NEG_INF),
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
));
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_POS_INF),
0xC100, 0x3C7E, 0x3C01, 0x0000, 0xC100, 0x3C7E, 0x3C01, 0x0000
));
TEST_CONSTEXPR(match_v8hi(
- __builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
+ _mm256_cvtps_ph(_mm256_setr_ps(-2.5f, 1.123f, POS_HALFWAY, 0.0f, -2.5f, 1.123f, POS_HALFWAY, 0.0f), _MM_FROUND_TO_ZERO),
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
));
@@ -123,4 +117,10 @@ TEST_CONSTEXPR(match_v8hi(
TEST_CONSTEXPR(match_v8hi(
__builtin_ia32_vcvtps2ph256(_mm256_setr_ps(-2.5f, 0.125f, -16.0f, 0.0f, -2.5f, 0.125f, -16.0f, 0.0f), _MM_FROUND_CUR_DIRECTION),
0xC100, 0x3000, 0xCC00, 0x0000, 0xC100, 0x3000, 0xCC00, 0x0000
-));
\ No newline at end of file
+));
+
+__m128i test_mm256_cvtps_ph(__m256 a) {
+ // CHECK-LABEL: test_mm256_cvtps_ph
+ // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
+ return _mm256_cvtps_ph(a, 0);
+}
\ No newline at end of file
>From 1308416e6804b510de6c97669be33223fbad943c Mon Sep 17 00:00:00 2001
From: ericxu233 <xuhanyang5 at gmail.com>
Date: Fri, 7 Nov 2025 16:20:46 -0500
Subject: [PATCH 4/4] Address review comments
---
clang/test/CodeGen/X86/f16c-builtins.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index 47ff06b270541..2ae4bc857b431 100755
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -46,12 +46,6 @@ __m128 test_mm_cvtph_ps(__m128i a) {
return _mm_cvtph_ps(a);
}
-__m128i test_mm_cvtps_ph(__m128 a) {
- // CHECK-LABEL: test_mm_cvtps_ph
- // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
- return _mm_cvtps_ph(a, 0);
-}
-
// A value exactly halfway between 1.0 and the next representable FP16 number.
// In binary, its significand ends in ...000, followed by a tie-bit 1.
#define POS_HALFWAY (1.0f + 0.00048828125f) // 1.0 + 2^-11, a tie-breaking case
@@ -108,6 +102,12 @@ TEST_CONSTEXPR(match_v8hi(
0xC100, 0x3C7D, 0x3C00, 0x0000, 0xC100, 0x3C7D, 0x3C00, 0x0000
));
+__m128i test_mm_cvtps_ph(__m128 a) {
+ // CHECK-LABEL: test_mm_cvtps_ph
+ // CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %{{.*}}, i32 0)
+ return _mm_cvtps_ph(a, 0);
+}
+
//
// Tests for Exact Dynamic Rounding
//
@@ -123,4 +123,4 @@ __m128i test_mm256_cvtps_ph(__m256 a) {
// CHECK-LABEL: test_mm256_cvtps_ph
// CHECK: call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %{{.*}}, i32 0)
return _mm256_cvtps_ph(a, 0);
-}
\ No newline at end of file
+}
More information about the cfe-commits
mailing list