[clang] [Clang][x86]: allow PCLMULQDQ intrinsics to be used in constexpr (PR #169214)
Ahmed Nour via cfe-commits
cfe-commits at lists.llvm.org
Thu Dec 11 12:47:25 PST 2025
https://github.com/ahmednoursphinx updated https://github.com/llvm/llvm-project/pull/169214
>From 60fac68bef81335aa12e2faa7e364bb647a51872 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 14:21:51 +0200
Subject: [PATCH 01/15] [Clang][x86]: allow PCLMULQDQ intrinsics to be used in
constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 6 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 72 ++++++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 63 +++++++++++++++++
clang/test/CodeGen/X86/pclmul-builtins.c | 18 ++++-
clang/test/CodeGen/X86/vpclmulqdq-builtins.c | 13 ++++
5 files changed, 168 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index f6069fdc5707a..1eee50a441e31 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -444,15 +444,15 @@ let Features = "avx512f,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth
def vgf2p8mulb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "pclmul", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "pclmul", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pclmulqdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant char)">;
}
-let Features = "vpclmulqdq", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pclmulqdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant char)">;
}
-let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 83e40f64fd979..ef740c04c83da 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2745,6 +2745,73 @@ static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
+ // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
+ // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
+ assert(Call->getArg(0)->getType()->isVectorType() &&
+ Call->getArg(1)->getType()->isVectorType());
+
+ // Extract imm8 argument
+ APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
+ unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
+ bool SelectUpperA = (Imm8Val & 0x01) != 0;
+ bool SelectUpperB = (Imm8Val & 0x10) != 0;
+
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VT->getElementType());
+ unsigned NumElems = VT->getNumElements();
+ const auto *DestVT = Call->getType()->castAs<VectorType>();
+ PrimType DestElemT = *S.getContext().classify(DestVT->getElementType());
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+ // Process each 128-bit lane (2 elements at a time)
+ for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
+ APSInt A0, A1, B0, B1;
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ A0 = LHS.elem<T>(Lane + 0).toAPSInt();
+ A1 = LHS.elem<T>(Lane + 1).toAPSInt();
+ B0 = RHS.elem<T>(Lane + 0).toAPSInt();
+ B1 = RHS.elem<T>(Lane + 1).toAPSInt();
+ });
+
+ // Select the appropriate 64-bit values based on imm8
+ APSInt A = SelectUpperA ? A1 : A0;
+ APSInt B = SelectUpperB ? B1 : B0;
+
+ // Perform carry-less multiplication (polynomial multiplication in GF(2^64))
+ // This multiplies two 64-bit values to produce a 128-bit result
+ APInt AVal = A.getValue().zextOrTrunc(64);
+ APInt BVal = B.getValue().zextOrTrunc(64);
+ APInt Result(128, 0);
+
+ // For each bit in A, if set, XOR B shifted left by that bit position
+ for (unsigned i = 0; i < 64; ++i) {
+ if (AVal[i]) {
+ APInt ShiftedB = BVal.zext(128) << i;
+ Result ^= ShiftedB;
+ }
+ }
+
+ // Split the 128-bit result into two 64-bit halves
+ APSInt ResultLow(Result.extractBits(64, 0), DestUnsigned);
+ APSInt ResultHigh(Result.extractBits(64, 64), DestUnsigned);
+
+ INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
+ Dst.elem<T>(Lane + 0) = static_cast<T>(ResultLow);
+ Dst.elem<T>(Lane + 1) = static_cast<T>(ResultHigh);
+ });
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_triop_fp(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
@@ -4366,6 +4433,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::APIntOps::muluExtended(LoLHS, LoRHS);
});
+ case clang::X86::BI__builtin_ia32_pclmulqdq128:
+ case clang::X86::BI__builtin_ia32_pclmulqdq256:
+ case clang::X86::BI__builtin_ia32_pclmulqdq512:
+ return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
+
case Builtin::BI__builtin_elementwise_fma:
return interp__builtin_elementwise_triop_fp(
S, OpPC, Call,
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..ea4a7c320a3f2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13483,6 +13483,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_pclmulqdq128:
+ case clang::X86::BI__builtin_ia32_pclmulqdq256:
+ case clang::X86::BI__builtin_ia32_pclmulqdq512: {
+ // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
+ // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
+ // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ APSInt Imm8;
+ if (!EvaluateInteger(E->getArg(2), Imm8, Info))
+ return false;
+
+ // Extract bits 0 and 4 from imm8
+ unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
+ bool SelectUpperA = (Imm8Val & 0x01) != 0;
+ bool SelectUpperB = (Imm8Val & 0x10) != 0;
+
+ unsigned NumElems = SourceLHS.getVectorLength();
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(NumElems);
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+
+ // Process each 128-bit lane
+ for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
+ // Get the two 64-bit halves of the first operand
+ APSInt A0 = SourceLHS.getVectorElt(Lane + 0).getInt();
+ APSInt A1 = SourceLHS.getVectorElt(Lane + 1).getInt();
+ // Get the two 64-bit halves of the second operand
+ APSInt B0 = SourceRHS.getVectorElt(Lane + 0).getInt();
+ APSInt B1 = SourceRHS.getVectorElt(Lane + 1).getInt();
+
+ // Select the appropriate 64-bit values based on imm8
+ APSInt A = SelectUpperA ? A1 : A0;
+ APSInt B = SelectUpperB ? B1 : B0;
+
+ // Perform carry-less multiplication (polynomial multiplication in GF(2^64))
+ // This multiplies two 64-bit values to produce a 128-bit result
+ APInt AVal = A.getValue().zextOrTrunc(64);
+ APInt BVal = B.getValue().zextOrTrunc(64);
+ APInt Result(128, 0);
+
+ // For each bit in A, if set, XOR B shifted left by that bit position
+ for (unsigned i = 0; i < 64; ++i) {
+ if (AVal[i]) {
+ APInt ShiftedB = BVal.zext(128) << i;
+ Result ^= ShiftedB;
+ }
+ }
+
+ // Split the 128-bit result into two 64-bit halves
+ APSInt ResultLow(Result.extractBits(64, 0), DestUnsigned);
+ APSInt ResultHigh(Result.extractBits(64, 64), DestUnsigned);
+
+ ResultElements.push_back(APValue(ResultLow));
+ ResultElements.push_back(APValue(ResultHigh));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_fshl:
case Builtin::BI__builtin_elementwise_fshr: {
APValue SourceHi, SourceLo, SourceShift;
diff --git a/clang/test/CodeGen/X86/pclmul-builtins.c b/clang/test/CodeGen/X86/pclmul-builtins.c
index 44300f645a9d0..b1e3cc5719d97 100644
--- a/clang/test/CodeGen/X86/pclmul-builtins.c
+++ b/clang/test/CodeGen/X86/pclmul-builtins.c
@@ -1,9 +1,25 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - | FileCheck %s
-
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - -std=c++11 | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s
#include <wmmintrin.h>
+#include "builtin_test_helpers.h"
__m128i test_mm_clmulepi64_si128(__m128i a, __m128i b) {
// CHECK: @llvm.x86.pclmulqdq
return _mm_clmulepi64_si128(a, b, 0);
}
+
+// Test constexpr evaluation for _mm_clmulepi64_si128
+// imm8=0x00: lower 64 bits of both operands
+// Test case: 0x1 * 0x3 = 0x3 (carry-less multiplication)
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, (__m128i){0x3ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL));
+
+// imm8=0x01: upper 64 bits of first operand, lower 64 bits of second
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, (__m128i){0x3ULL, 0x0ULL}, 0x01), 0x3ULL, 0x0ULL));
+
+// imm8=0x10: lower 64 bits of first operand, upper 64 bits of second
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, (__m128i){0x0ULL, 0x3ULL}, 0x10), 0x3ULL, 0x0ULL));
+
+// imm8=0x11: upper 64 bits of both operands
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, (__m128i){0x0ULL, 0x3ULL}, 0x11), 0x3ULL, 0x0ULL));
diff --git a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
index aa2b8bca91268..e408e0556e380 100644
--- a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
+++ b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
@@ -1,17 +1,30 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | FileCheck %s --check-prefix AVX
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - -std=c++11 | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - -std=c++11 | FileCheck %s --check-prefixes AVX,AVX512
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix AVX
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - -std=c++11 -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes AVX,AVX512
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
// AVX: @llvm.x86.pclmulqdq.256
return _mm256_clmulepi64_epi128(A, B, 0);
}
+// Test constexpr evaluation for _mm256_clmulepi64_epi128
+// Each 128-bit lane is processed independently
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128((__m256i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL}, (__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
+
#ifdef __AVX512F__
__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
// AVX512: @llvm.x86.pclmulqdq.512
return _mm512_clmulepi64_epi128(A, B, 0);
}
+
+// Test constexpr evaluation for _mm512_clmulepi64_epi128
+// Each 128-bit lane is processed independently
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128((__m512i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}, (__m512i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
#endif
>From 12b605b944716fbe9fa9ad8075fdf2672098d7af Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 14:28:48 +0200
Subject: [PATCH 02/15] chore: Format files
---
clang/include/clang/Basic/BuiltinsX86.td | 9 ++++++---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
clang/lib/AST/ExprConstant.cpp | 4 ++--
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 1eee50a441e31..ecc05974adecb 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -444,15 +444,18 @@ let Features = "avx512f,gfni", Attributes = [NoThrow, Const, RequiredVectorWidth
def vgf2p8mulb_v64qi : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "pclmul", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "pclmul",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pclmulqdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant char)">;
}
-let Features = "vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "vpclmulqdq",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pclmulqdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant char)">;
}
-let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512f,vpclmulqdq",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ef740c04c83da..83a61f496a3ec 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2746,13 +2746,13 @@ static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
}
static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
+ const CallExpr *Call) {
// PCLMULQDQ: carry-less multiplication of selected 64-bit halves
// imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
// imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
-
+
// Extract imm8 argument
APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ea4a7c320a3f2..fbd3701c784d3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13522,8 +13522,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APSInt A = SelectUpperA ? A1 : A0;
APSInt B = SelectUpperB ? B1 : B0;
- // Perform carry-less multiplication (polynomial multiplication in GF(2^64))
- // This multiplies two 64-bit values to produce a 128-bit result
+ // Perform carry-less multiplication (polynomial multiplication in
+ // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
APInt AVal = A.getValue().zextOrTrunc(64);
APInt BVal = B.getValue().zextOrTrunc(64);
APInt Result(128, 0);
>From 2f80098af72c5758151f3c60104b4e3ead925266 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 15:30:51 +0200
Subject: [PATCH 03/15] refactor: PR Feedback
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 ++---
clang/lib/AST/ExprConstant.cpp | 5 ++---
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 83a61f496a3ec..6a71ff9c01586 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2755,9 +2755,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// Extract imm8 argument
APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
- unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
- bool SelectUpperA = (Imm8Val & 0x01) != 0;
- bool SelectUpperB = (Imm8Val & 0x10) != 0;
+ bool SelectUpperA = (Imm8.getZExtValue() & 0x01) != 0;
+ bool SelectUpperB = (Imm8.getZExtValue() & 0x10) != 0;
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fbd3701c784d3..ed309bdd3e377 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13499,9 +13499,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
// Extract bits 0 and 4 from imm8
- unsigned Imm8Val = static_cast<unsigned>(Imm8.getZExtValue());
- bool SelectUpperA = (Imm8Val & 0x01) != 0;
- bool SelectUpperB = (Imm8Val & 0x10) != 0;
+ bool SelectUpperA = (Imm8.getZExtValue() & 0x01) != 0;
+ bool SelectUpperB = (Imm8.getZExtValue() & 0x10) != 0;
unsigned NumElems = SourceLHS.getVectorLength();
SmallVector<APValue, 8> ResultElements;
>From e3de2515f40786672c2a260a2ea290903f499a0f Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 15:46:02 +0200
Subject: [PATCH 04/15] refactor: simplify conversion from APSInt to APInt
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
clang/lib/AST/ExprConstant.cpp | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6a71ff9c01586..be0b560814ebd 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2785,8 +2785,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// Perform carry-less multiplication (polynomial multiplication in GF(2^64))
// This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.getValue().zextOrTrunc(64);
- APInt BVal = B.getValue().zextOrTrunc(64);
+ APInt AVal = A.extOrTrunc(64);
+ APInt BVal = B.extOrTrunc(64);
APInt Result(128, 0);
// For each bit in A, if set, XOR B shifted left by that bit position
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ed309bdd3e377..22f042d515ac5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13523,8 +13523,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
// Perform carry-less multiplication (polynomial multiplication in
// GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.getValue().zextOrTrunc(64);
- APInt BVal = B.getValue().zextOrTrunc(64);
+ APInt AVal = A.extOrTrunc(64);
+ APInt BVal = B.extOrTrunc(64);
APInt Result(128, 0);
// For each bit in A, if set, XOR B shifted left by that bit position
>From 5afa1b171ccd84eaa4935e429160696086b158ae Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 15:47:31 +0200
Subject: [PATCH 05/15] refactor: Use static casting
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
clang/lib/AST/ExprConstant.cpp | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index be0b560814ebd..c692e32cdefc8 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2785,8 +2785,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// Perform carry-less multiplication (polynomial multiplication in GF(2^64))
// This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.extOrTrunc(64);
- APInt BVal = B.extOrTrunc(64);
+ APInt AVal = static_cast<const APInt &>(A).zextOrTrunc(64);
+ APInt BVal = static_cast<const APInt &>(B).zextOrTrunc(64);
APInt Result(128, 0);
// For each bit in A, if set, XOR B shifted left by that bit position
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 22f042d515ac5..03ee822b57143 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13523,8 +13523,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
// Perform carry-less multiplication (polynomial multiplication in
// GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.extOrTrunc(64);
- APInt BVal = B.extOrTrunc(64);
+ APInt AVal = static_cast<const APInt &>(A).zextOrTrunc(64);
+ APInt BVal = static_cast<const APInt &>(B).zextOrTrunc(64);
APInt Result(128, 0);
// For each bit in A, if set, XOR B shifted left by that bit position
>From f5c5f23d16a9e4412e55ba8766e9d02f4184d5fa Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 16:24:02 +0200
Subject: [PATCH 06/15] refactor: update tests
---
clang/test/CodeGen/X86/pclmul-builtins.c | 8 ++++----
clang/test/CodeGen/X86/vpclmulqdq-builtins.c | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/clang/test/CodeGen/X86/pclmul-builtins.c b/clang/test/CodeGen/X86/pclmul-builtins.c
index b1e3cc5719d97..5af4014b0f663 100644
--- a/clang/test/CodeGen/X86/pclmul-builtins.c
+++ b/clang/test/CodeGen/X86/pclmul-builtins.c
@@ -13,13 +13,13 @@ __m128i test_mm_clmulepi64_si128(__m128i a, __m128i b) {
// Test constexpr evaluation for _mm_clmulepi64_si128
// imm8=0x00: lower 64 bits of both operands
// Test case: 0x1 * 0x3 = 0x3 (carry-less multiplication)
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, (__m128i){0x3ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), ((__m128i){0x3ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL));
// imm8=0x01: upper 64 bits of first operand, lower 64 bits of second
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, (__m128i){0x3ULL, 0x0ULL}, 0x01), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), ((__m128i){0x3ULL, 0x0ULL}), 0x01), 0x3ULL, 0x0ULL));
// imm8=0x10: lower 64 bits of first operand, upper 64 bits of second
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x1ULL, 0x0ULL}, (__m128i){0x0ULL, 0x3ULL}, 0x10), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), ((__m128i){0x0ULL, 0x3ULL}), 0x10), 0x3ULL, 0x0ULL));
// imm8=0x11: upper 64 bits of both operands
-TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128((__m128i){0x0ULL, 0x1ULL}, (__m128i){0x0ULL, 0x3ULL}, 0x11), 0x3ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), ((__m128i){0x0ULL, 0x3ULL}), 0x11), 0x3ULL, 0x0ULL));
diff --git a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
index e408e0556e380..24b5594518009 100644
--- a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
+++ b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
@@ -15,7 +15,7 @@ __m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
// Test constexpr evaluation for _mm256_clmulepi64_epi128
// Each 128-bit lane is processed independently
-TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128((__m256i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL}, (__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL}), ((__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
#ifdef __AVX512F__
__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
@@ -25,6 +25,6 @@ __m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
// Test constexpr evaluation for _mm512_clmulepi64_epi128
// Each 128-bit lane is processed independently
-TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128((__m512i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}, (__m512i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}, 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}), ((__m512i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
#endif
>From 0cee75e569754d6813d5dc20281109a8c2292b17 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 16:30:50 +0200
Subject: [PATCH 07/15] Update InterpBuiltin.cpp
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c692e32cdefc8..106b67565ba51 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2755,8 +2755,8 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// Extract imm8 argument
APSInt Imm8 = popToAPSInt(S, Call->getArg(2));
- bool SelectUpperA = (Imm8.getZExtValue() & 0x01) != 0;
- bool SelectUpperB = (Imm8.getZExtValue() & 0x10) != 0;
+ bool SelectUpperA = (Imm8 & 0x01) != 0;
+ bool SelectUpperB = (Imm8 & 0x10) != 0;
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
>From 974ddac3a7c98f37d78875dd9c55450d9a434581 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 16:32:30 +0200
Subject: [PATCH 08/15] Update ExprConstant.cpp
---
clang/lib/AST/ExprConstant.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 03ee822b57143..e34c8b6f119d4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13499,8 +13499,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
// Extract bits 0 and 4 from imm8
- bool SelectUpperA = (Imm8.getZExtValue() & 0x01) != 0;
- bool SelectUpperB = (Imm8.getZExtValue() & 0x10) != 0;
+ bool SelectUpperA = (Imm8 & 0x01) != 0;
+ bool SelectUpperB = (Imm8 & 0x10) != 0;
unsigned NumElems = SourceLHS.getVectorLength();
SmallVector<APValue, 8> ResultElements;
>From 7a2832377d285dc68675f3f9f177c4c69981b562 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 16:40:34 +0200
Subject: [PATCH 09/15] refactor: Use APInt instead of APSInt
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++----
clang/lib/AST/ExprConstant.cpp | 10 +++++-----
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 106b67565ba51..710bbb5267079 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2780,13 +2780,13 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
});
// Select the appropriate 64-bit values based on imm8
- APSInt A = SelectUpperA ? A1 : A0;
- APSInt B = SelectUpperB ? B1 : B0;
+ APInt A = SelectUpperA ? A1 : A0;
+ APInt B = SelectUpperB ? B1 : B0;
// Perform carry-less multiplication (polynomial multiplication in GF(2^64))
// This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = static_cast<const APInt &>(A).zextOrTrunc(64);
- APInt BVal = static_cast<const APInt &>(B).zextOrTrunc(64);
+ APInt AVal = A.zextOrTrunc(64);;
+ APInt BVal = B.zextOrTrunc(64);
APInt Result(128, 0);
// For each bit in A, if set, XOR B shifted left by that bit position
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e34c8b6f119d4..b237af5d357aa 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13518,13 +13518,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APSInt B1 = SourceRHS.getVectorElt(Lane + 1).getInt();
// Select the appropriate 64-bit values based on imm8
- APSInt A = SelectUpperA ? A1 : A0;
- APSInt B = SelectUpperB ? B1 : B0;
+ APInt A = SelectUpperA ? A1 : A0;
+ APInt B = SelectUpperB ? B1 : B0;
// Perform carry-less multiplication (polynomial multiplication in
- // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = static_cast<const APInt &>(A).zextOrTrunc(64);
- APInt BVal = static_cast<const APInt &>(B).zextOrTrunc(64);
+ // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
+ APInt AVal = A.zextOrTrunc(64);;
+ APInt BVal = B.zextOrTrunc(64);
APInt Result(128, 0);
// For each bit in A, if set, XOR B shifted left by that bit position
>From 5c7eb8e9f8661e5f123badf01487b038473db24d Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 16:42:04 +0200
Subject: [PATCH 10/15] feat: update formatting
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
clang/lib/AST/ExprConstant.cpp | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 710bbb5267079..9032709b7ac6f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2785,7 +2785,7 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// Perform carry-less multiplication (polynomial multiplication in GF(2^64))
// This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.zextOrTrunc(64);;
+ APInt AVal = A.zextOrTrunc(64);
APInt BVal = B.zextOrTrunc(64);
APInt Result(128, 0);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b237af5d357aa..7bbebfead8320 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13522,8 +13522,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APInt B = SelectUpperB ? B1 : B0;
// Perform carry-less multiplication (polynomial multiplication in
- // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.zextOrTrunc(64);;
+ // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
+ APInt AVal = A.zextOrTrunc(64);
+ ;
APInt BVal = B.zextOrTrunc(64);
APInt Result(128, 0);
>From 3a72489d1a6881c3dad96c4f09a4b1cfcd042b73 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 20:21:48 +0200
Subject: [PATCH 11/15] feat: Use APIntOps::clmul
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 16 +++++++---------
clang/lib/AST/ExprConstant.cpp | 17 +++++++----------
2 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9032709b7ac6f..cdfaa5bf89aa5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2787,15 +2787,13 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// This multiplies two 64-bit values to produce a 128-bit result
APInt AVal = A.zextOrTrunc(64);
APInt BVal = B.zextOrTrunc(64);
- APInt Result(128, 0);
-
- // For each bit in A, if set, XOR B shifted left by that bit position
- for (unsigned i = 0; i < 64; ++i) {
- if (AVal[i]) {
- APInt ShiftedB = BVal.zext(128) << i;
- Result ^= ShiftedB;
- }
- }
+
+ // Extend both operands to 128 bits for carry-less multiplication
+ APInt A128 = AVal.zext(128);
+ APInt B128 = BVal.zext(128);
+
+ // Use APIntOps::clmul for carry-less multiplication
+ APInt Result = llvm::APIntOps::clmul(A128, B128);
// Split the 128-bit result into two 64-bit halves
APSInt ResultLow(Result.extractBits(64, 0), DestUnsigned);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7bbebfead8320..dcf189163e347 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13524,17 +13524,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
// Perform carry-less multiplication (polynomial multiplication in
// GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
APInt AVal = A.zextOrTrunc(64);
- ;
APInt BVal = B.zextOrTrunc(64);
- APInt Result(128, 0);
-
- // For each bit in A, if set, XOR B shifted left by that bit position
- for (unsigned i = 0; i < 64; ++i) {
- if (AVal[i]) {
- APInt ShiftedB = BVal.zext(128) << i;
- Result ^= ShiftedB;
- }
- }
+
+ // Extend both operands to 128 bits for carry-less multiplication
+ APInt A128 = AVal.zext(128);
+ APInt B128 = BVal.zext(128);
+
+ // Use APIntOps::clmul for carry-less multiplication
+ APInt Result = llvm::APIntOps::clmul(A128, B128);
// Split the 128-bit result into two 64-bit halves
APSInt ResultLow(Result.extractBits(64, 0), DestUnsigned);
>From 5c486a92c486275b1b57936cb3a911bd155ced66 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 23 Nov 2025 20:22:08 +0200
Subject: [PATCH 12/15] chore: format files
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
clang/lib/AST/ExprConstant.cpp | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index cdfaa5bf89aa5..ccf471263ae60 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2787,11 +2787,11 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
// This multiplies two 64-bit values to produce a 128-bit result
APInt AVal = A.zextOrTrunc(64);
APInt BVal = B.zextOrTrunc(64);
-
+
// Extend both operands to 128 bits for carry-less multiplication
APInt A128 = AVal.zext(128);
APInt B128 = BVal.zext(128);
-
+
// Use APIntOps::clmul for carry-less multiplication
APInt Result = llvm::APIntOps::clmul(A128, B128);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index dcf189163e347..6d4c195f4a0c9 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13525,11 +13525,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
// GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
APInt AVal = A.zextOrTrunc(64);
APInt BVal = B.zextOrTrunc(64);
-
+
// Extend both operands to 128 bits for carry-less multiplication
APInt A128 = AVal.zext(128);
APInt B128 = BVal.zext(128);
-
+
// Use APIntOps::clmul for carry-less multiplication
APInt Result = llvm::APIntOps::clmul(A128, B128);
>From ec1331c7557a466b80b69bea6c776482b8eccc96 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Mon, 24 Nov 2025 13:36:01 +0200
Subject: [PATCH 13/15] refactor: remove unused conversion
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 9 ++-------
clang/lib/AST/ExprConstant.cpp | 9 ++-------
2 files changed, 4 insertions(+), 14 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ccf471263ae60..096ef37d5bb01 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2783,14 +2783,9 @@ static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
APInt A = SelectUpperA ? A1 : A0;
APInt B = SelectUpperB ? B1 : B0;
- // Perform carry-less multiplication (polynomial multiplication in GF(2^64))
- // This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.zextOrTrunc(64);
- APInt BVal = B.zextOrTrunc(64);
-
// Extend both operands to 128 bits for carry-less multiplication
- APInt A128 = AVal.zext(128);
- APInt B128 = BVal.zext(128);
+ APInt A128 = A.zext(128);
+ APInt B128 = B.zext(128);
// Use APIntOps::clmul for carry-less multiplication
APInt Result = llvm::APIntOps::clmul(A128, B128);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6d4c195f4a0c9..c0d7832f480d8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13521,14 +13521,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APInt A = SelectUpperA ? A1 : A0;
APInt B = SelectUpperB ? B1 : B0;
- // Perform carry-less multiplication (polynomial multiplication in
- // GF(2^64)) This multiplies two 64-bit values to produce a 128-bit result
- APInt AVal = A.zextOrTrunc(64);
- APInt BVal = B.zextOrTrunc(64);
-
// Extend both operands to 128 bits for carry-less multiplication
- APInt A128 = AVal.zext(128);
- APInt B128 = BVal.zext(128);
+ APInt A128 = A.zext(128);
+ APInt B128 = B.zext(128);
// Use APIntOps::clmul for carry-less multiplication
APInt Result = llvm::APIntOps::clmul(A128, B128);
>From 931bcc5320a53870df7b30bddeff132eea77a5ef Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Mon, 24 Nov 2025 13:40:51 +0200
Subject: [PATCH 14/15] feat: add upper bits testing
---
clang/test/CodeGen/X86/pclmul-builtins.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/clang/test/CodeGen/X86/pclmul-builtins.c b/clang/test/CodeGen/X86/pclmul-builtins.c
index 5af4014b0f663..ee8e05e4cf2e5 100644
--- a/clang/test/CodeGen/X86/pclmul-builtins.c
+++ b/clang/test/CodeGen/X86/pclmul-builtins.c
@@ -23,3 +23,20 @@ TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x1ULL, 0x0ULL}), ((_
// imm8=0x11: upper 64 bits of both operands
TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, 0x1ULL}), ((__m128i){0x0ULL, 0x3ULL}), 0x11), 0x3ULL, 0x0ULL));
+
+// Test cases with non-zero upper 64-bit results
+// imm8=0x00: lower 64 bits of both operands
+// 0x8000000000000000 * 0x2 = result with upper bits set
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){(long long)0x8000000000000000ULL, 0x0ULL}), ((__m128i){0x2ULL, 0x0ULL}), 0x00), 0x0ULL, 0x1ULL));
+
+// imm8=0x01: upper 64 bits of first operand, lower 64 bits of second
+// 0xFFFFFFFFFFFFFFFF * 0x2 = result with upper bits set
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, (long long)0xFFFFFFFFFFFFFFFFULL}), ((__m128i){0x2ULL, 0x0ULL}), 0x01), 0xFFFFFFFFFFFFFFFEULL, 0x1ULL));
+
+// imm8=0x10: lower 64 bits of first operand, upper 64 bits of second
+// 0x1000000000000000 * 0x10 = result with upper bits set
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){(long long)0x1000000000000000ULL, 0x0ULL}), ((__m128i){0x0ULL, 0x10ULL}), 0x10), 0x0ULL, 0x1ULL));
+
+// imm8=0x11: upper 64 bits of both operands
+// 0x8000000000000001 * 0x8000000000000001 = result with upper bits set
+TEST_CONSTEXPR(match_m128i(_mm_clmulepi64_si128(((__m128i){0x0ULL, (long long)0x8000000000000001ULL}), ((__m128i){0x0ULL, (long long)0x8000000000000001ULL}), 0x11), 0x1ULL, 0x4000000000000000ULL));
>From e433e3348a654daa97cd1549352b963273f7bd5c Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Fri, 28 Nov 2025 22:08:44 +0200
Subject: [PATCH 15/15] refactor: Add complex test cases
---
clang/test/CodeGen/X86/vpclmulqdq-builtins.c | 65 ++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
index 24b5594518009..0d530862f1d37 100644
--- a/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
+++ b/clang/test/CodeGen/X86/vpclmulqdq-builtins.c
@@ -15,8 +15,45 @@ __m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) {
// Test constexpr evaluation for _mm256_clmulepi64_epi128
// Each 128-bit lane is processed independently
+
+// Basic test cases for all imm8 values (0x00, 0x01, 0x10, 0x11)
+// imm8=0x00: lower 64 bits of both operands in each lane
TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL}), ((__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
+// imm8=0x01: upper 64 bits of first operand, lower 64 bits of second in each lane
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x0ULL, 0x1ULL, 0x0ULL, 0x2ULL}), ((__m256i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL}), 0x01), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
+
+// imm8=0x10: lower 64 bits of first operand, upper 64 bits of second in each lane
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL}), ((__m256i){0x0ULL, 0x3ULL, 0x0ULL, 0x5ULL}), 0x10), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
+
+// imm8=0x11: upper 64 bits of both operands in each lane
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x0ULL, 0x1ULL, 0x0ULL, 0x2ULL}), ((__m256i){0x0ULL, 0x3ULL, 0x0ULL, 0x5ULL}), 0x11), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL));
+
+// Complex test cases with edge values and non-zero upper 64-bit results
+// Test with high bit set (0x8000000000000000) - produces result with upper bits
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){(long long)0x8000000000000000ULL, 0x0ULL, (long long)0x8000000000000000ULL, 0x0ULL}), ((__m256i){0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL}), 0x00), 0x0ULL, 0x1ULL, 0x0ULL, 0x2ULL));
+
+// Test with all bits set (0xFFFFFFFFFFFFFFFF) - maximum value
+// imm8=0x01: upper(A) * lower(B) for each 128-bit lane
+// For lane 0: upper(0xFFFFFFFFFFFFFFFF) * lower(0x2)
+// For lane 1: upper(0xFFFFFFFFFFFFFFFF) * lower(0x3)
+// Note: This test case removed due to complexity - using simpler edge cases instead
+
+// Test with large values that cause carry propagation
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){(long long)0x1000000000000000ULL, 0x0ULL, (long long)0x2000000000000000ULL, 0x0ULL}), ((__m256i){0x0ULL, 0x10ULL, 0x0ULL, 0x20ULL}), 0x10), 0x0ULL, 0x1ULL, 0x0ULL, 0x4ULL));
+
+// Test with values that produce results in upper 64 bits
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL}), ((__m256i){0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL}), 0x11), 0x1ULL, 0x4000000000000000ULL, 0x1ULL, 0x4000000000000000ULL));
+
+// Test with polynomial-like values (common in CRC/GCM)
+// x^63 + x^62 + ... + x + 1 = 0xFFFFFFFFFFFFFFFF
+// x^64 = 0x10000000000000000 (represented as upper 64 bits = 1)
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x1ULL, 0x0ULL, (long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL}), ((__m256i){(long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL, 0x1ULL, 0x0ULL}), 0x00), (long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL, (long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL));
+
+// Test with sparse polynomials (few bits set)
+TEST_CONSTEXPR(match_m256i(_mm256_clmulepi64_epi128(((__m256i){0x5ULL, 0x0ULL, 0x9ULL, 0x0ULL}), ((__m256i){0x3ULL, 0x0ULL, 0x7ULL, 0x0ULL}), 0x00), 0xfULL, 0x0ULL, 0x3fULL, 0x0ULL));
+
+
#ifdef __AVX512F__
__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
// AVX512: @llvm.x86.pclmulqdq.512
@@ -25,6 +62,34 @@ __m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) {
// Test constexpr evaluation for _mm512_clmulepi64_epi128
// Each 128-bit lane is processed independently
+
+// Basic test cases for all imm8 values (0x00, 0x01, 0x10, 0x11)
+// imm8=0x00: lower 64 bits of both operands in each lane
TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}), ((__m512i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}), 0x00), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
+
+// imm8=0x01: upper 64 bits of first operand, lower 64 bits of second in each lane
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x0ULL, 0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL}), ((__m512i){0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL, 0x0ULL}), 0x01), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
+
+// imm8=0x10: lower 64 bits of first operand, upper 64 bits of second in each lane
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL}), ((__m512i){0x0ULL, 0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL}), 0x10), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
+
+// imm8=0x11: upper 64 bits of both operands in each lane
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x0ULL, 0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL}), ((__m512i){0x0ULL, 0x3ULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x7ULL, 0x0ULL, 0x9ULL}), 0x11), 0x3ULL, 0x0ULL, 0xaULL, 0x0ULL, 0x1cULL, 0x0ULL, 0x48ULL, 0x0ULL));
+
+// Complex test cases with edge values and non-zero upper 64-bit results
+// Test with high bit set (0x8000000000000000) - produces result with upper bits
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){(long long)0x8000000000000000ULL, 0x0ULL, (long long)0x8000000000000000ULL, 0x0ULL, (long long)0x8000000000000000ULL, 0x0ULL, (long long)0x8000000000000000ULL, 0x0ULL}), ((__m512i){0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL, 0x0ULL, 0x10ULL, 0x0ULL}), 0x00), 0x0ULL, 0x1ULL, 0x0ULL, 0x2ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x8ULL));
+
+// Test with all bits set (0xFFFFFFFFFFFFFFFF) - maximum value
+// Note: Complex test case with all 1s removed - using simpler edge cases instead
+
+// Test with large values that cause carry propagation
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){(long long)0x1000000000000000ULL, 0x0ULL, (long long)0x2000000000000000ULL, 0x0ULL, (long long)0x4000000000000000ULL, 0x0ULL, (long long)0x8000000000000000ULL, 0x0ULL}), ((__m512i){0x0ULL, 0x10ULL, 0x0ULL, 0x20ULL, 0x0ULL, 0x40ULL, 0x0ULL, 0x80ULL}), 0x10), 0x0ULL, 0x1ULL, 0x0ULL, 0x4ULL, 0x0ULL, 0x10ULL, 0x0ULL, 0x40ULL));
+
+// Test with values that produce results in upper 64 bits
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL}), ((__m512i){0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL, 0x0ULL, (long long)0x8000000000000001ULL}), 0x11), 0x1ULL, 0x4000000000000000ULL, 0x1ULL, 0x4000000000000000ULL, 0x1ULL, 0x4000000000000000ULL, 0x1ULL, 0x4000000000000000ULL));
+
+// Test with polynomial-like values (common in CRC/GCM) across all lanes
+TEST_CONSTEXPR(match_m512i(_mm512_clmulepi64_epi128(((__m512i){0x1ULL, 0x0ULL, (long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL, 0x5ULL, 0x0ULL, 0x9ULL, 0x0ULL}), ((__m512i){(long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL, 0x1ULL, 0x0ULL, 0x3ULL, 0x0ULL, 0x7ULL, 0x0ULL}), 0x00), (long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL, (long long)0xFFFFFFFFFFFFFFFFULL, 0x0ULL, 0xfULL, 0x0ULL, 0x3fULL, 0x0ULL));
#endif
More information about the cfe-commits
mailing list