[clang] [clang] Introduce elementwise clz/ctz builtins (PR #131995)
Fraser Cormack via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 16 08:47:05 PDT 2025
https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/131995
>From 93112f0d5f0a8f6c8b1803a61d549701ba476f20 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 3 Feb 2025 16:54:17 +0000
Subject: [PATCH 1/6] [clang] Introduce elementwise clz/ctz builtins
These builtins are modeled on the clzg/ctzg builtins, which accept an
optional second argument. This second argument is returned if the first
argument is 0.
---
clang/docs/LanguageExtensions.rst | 8 ++
clang/include/clang/Basic/Builtins.td | 12 +++
clang/lib/CodeGen/CGBuiltin.cpp | 18 ++--
clang/lib/Sema/SemaChecking.cpp | 13 +++
.../test/CodeGen/builtins-elementwise-math.c | 96 +++++++++++++++++++
clang/test/Sema/builtins-elementwise-math.c | 44 +++++++++
6 files changed, 185 insertions(+), 6 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index f448a9a8db172..c938fc64c129b 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -848,6 +848,14 @@ of different sizes and signs is forbidden in binary and ternary builtins.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
for the comparison.
+ T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types
+ the first argument is 0 and an optional second argument is provided,
+ the second argument is returned. If the first argument is 0 but only
+ one argument is provided, the behaviour is undefined.
+ T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types
+ the first argument is 0 and an optional second argument is provided,
+ the second argument is returned. If the first argument is 0 but only
+ one argument is provided, the behaviour is undefined.
============================================== ====================================================================== =========================================
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 5ebb82180521d..e0f78ea2ed2aa 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1502,6 +1502,18 @@ def ElementwiseSubSat : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseClz : Builtin {
+ let Spellings = ["__builtin_elementwise_clz"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseCtz : Builtin {
+ let Spellings = ["__builtin_elementwise_ctz"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
+ let Prototype = "void(...)";
+}
+
def ReduceMax : Builtin {
let Spellings = ["__builtin_reduce_max"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5f2eb76e7bacb..4c341cb2a42cc 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3322,9 +3322,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_ctz:
case Builtin::BI__builtin_ctzl:
case Builtin::BI__builtin_ctzll:
- case Builtin::BI__builtin_ctzg: {
- bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
- E->getNumArgs() > 1;
+ case Builtin::BI__builtin_ctzg:
+ case Builtin::BI__builtin_elementwise_ctz: {
+ bool HasFallback =
+ (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg ||
+ BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctz) &&
+ E->getNumArgs() > 1;
Value *ArgValue =
HasFallback ? EmitScalarExpr(E->getArg(0))
@@ -3354,9 +3357,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_clz:
case Builtin::BI__builtin_clzl:
case Builtin::BI__builtin_clzll:
- case Builtin::BI__builtin_clzg: {
- bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
- E->getNumArgs() > 1;
+ case Builtin::BI__builtin_clzg:
+ case Builtin::BI__builtin_elementwise_clz: {
+ bool HasFallback =
+ (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg ||
+ BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_clz) &&
+ E->getNumArgs() > 1;
Value *ArgValue =
HasFallback ? EmitScalarExpr(E->getArg(0))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index dd5b710d7e1d4..e60ae9ebae4d4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3070,6 +3070,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
TheCall->setType(Magnitude.get()->getType());
break;
}
+ case Builtin::BI__builtin_elementwise_clz:
+ case Builtin::BI__builtin_elementwise_ctz:
+ // These builtins can be unary or binary. Note for empty calls we call the
+ // unary checker in order to not emit an error that says the function
+ // expects 2 arguments, which would be misleading.
+ if (TheCall->getNumArgs() <= 1) {
+ if (PrepareBuiltinElementwiseMathOneArgCall(
+ TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
+ return ExprError();
+ } else if (BuiltinElementwiseMath(
+ TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
+ return ExprError();
+ break;
case Builtin::BI__builtin_reduce_max:
case Builtin::BI__builtin_reduce_min: {
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index ee8345ff51e5e..537e38bb0bd28 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -1176,3 +1176,99 @@ void test_builtin_elementwise_fma(float f32, double f64,
half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);
}
+
+void test_builtin_elementwise_clz(si8 vs1, si8 vs2, u4 vu1,
+ long long int lli, short si,
+ _BitInt(31) bi, int i,
+ char ci) {
+ // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+ // CHECK-NEXT: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
+ vs1 = __builtin_elementwise_clz(vs1);
+
+ // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+ // CHECK-NEXT: [[CLZ:%.+]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
+ // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
+ // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
+ // select <8 x i1> [[ISZERO]], <8 x i16> [[CLZ]], <8 x i16> [[V8S2]]
+ vs1 = __builtin_elementwise_clz(vs1, vs2);
+
+ // CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[V4U1]], i1 true)
+ vu1 = __builtin_elementwise_clz(vu1);
+
+ // CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr
+ // CHECK-NEXT: call i64 @llvm.ctlz.i64(i64 [[LLI]], i1 true)
+ lli = __builtin_elementwise_clz(lli);
+
+ // CHECK: [[SI:%.+]] = load i16, ptr %si.addr
+ // CHECK-NEXT: call i16 @llvm.ctlz.i16(i16 [[SI]], i1 true)
+ si = __builtin_elementwise_clz(si);
+
+ // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
+ // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+ // CHECK-NEXT: call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
+ bi = __builtin_elementwise_clz(bi);
+
+ // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
+ // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+ // CHECK-NEXT: [[CLZ:%.+]] = call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
+ // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
+ // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[CLZ]]
+ bi = __builtin_elementwise_clz(bi, (_BitInt(31))1);
+
+ // CHECK: [[I:%.+]] = load i32, ptr %i.addr
+ // CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 [[I]], i1 true)
+ i = __builtin_elementwise_clz(i);
+
+ // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr
+ // CHECK-NEXT: call i8 @llvm.ctlz.i8(i8 [[CI]], i1 true)
+ ci = __builtin_elementwise_clz(ci);
+}
+
+void test_builtin_elementwise_ctz(si8 vs1, si8 vs2, u4 vu1,
+ long long int lli, short si,
+ _BitInt(31) bi, int i,
+ char ci) {
+ // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+ // CHECK-NEXT: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
+ vs1 = __builtin_elementwise_ctz(vs1);
+
+ // CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
+ // CHECK-NEXT: [[ctz:%.+]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
+ // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
+ // CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
+ // select <8 x i1> [[ISZERO]], <8 x i16> [[ctz]], <8 x i16> [[V8S2]]
+ vs1 = __builtin_elementwise_ctz(vs1, vs2);
+
+ // CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[V4U1]], i1 true)
+ vu1 = __builtin_elementwise_ctz(vu1);
+
+ // CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr
+ // CHECK-NEXT: call i64 @llvm.cttz.i64(i64 [[LLI]], i1 true)
+ lli = __builtin_elementwise_ctz(lli);
+
+ // CHECK: [[SI:%.+]] = load i16, ptr %si.addr
+ // CHECK-NEXT: call i16 @llvm.cttz.i16(i16 [[SI]], i1 true)
+ si = __builtin_elementwise_ctz(si);
+
+ // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
+ // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+ // CHECK-NEXT: call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
+ bi = __builtin_elementwise_ctz(bi);
+
+ // CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
+ // CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
+ // CHECK-NEXT: [[ctz:%.+]] = call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
+ // CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
+ // CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[ctz]]
+ bi = __builtin_elementwise_ctz(bi, (_BitInt(31))1);
+
+ // CHECK: [[I:%.+]] = load i32, ptr %i.addr
+ // CHECK-NEXT: call i32 @llvm.cttz.i32(i32 [[I]], i1 true)
+ i = __builtin_elementwise_ctz(i);
+
+ // CHECK: [[CI:%.+]] = load i8, ptr %ci.addr
+ // CHECK-NEXT: call i8 @llvm.cttz.i8(i8 [[CI]], i1 true)
+ ci = __builtin_elementwise_ctz(ci);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 01057b3f8d083..94b987c09b9e5 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -1213,3 +1213,47 @@ float3 foo(float3 a,const struct_float3* hi) {
float3 b = __builtin_elementwise_max((float3)(0.0f), a);
return __builtin_elementwise_pow(b, hi->b.yyy);
}
+
+void test_builtin_elementwise_clz(int i32, int2 v2i32, short i16,
+ double f64, double2 v2f64) {
+ f64 = __builtin_elementwise_clz(f64);
+ // expected-error at -1 {{1st argument must be a scalar or vector of integer types (was 'double')}}
+
+ _Complex float c1;
+ c1 = __builtin_elementwise_clz(c1);
+ // expected-error at -1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}}
+
+ v2i32 = __builtin_elementwise_clz(v2i32, i32);
+ // expected-error at -1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}}
+
+ v2i32 = __builtin_elementwise_clz(v2i32, f64);
+ // expected-error at -1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}}
+
+ v2i32 = __builtin_elementwise_clz();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+ v2i32 = __builtin_elementwise_clz(v2i32, v2i32, f64);
+ // expected-error at -1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+void test_builtin_elementwise_ctz(int i32, int2 v2i32, short i16,
+ double f64, double2 v2f64) {
+ f64 = __builtin_elementwise_ctz(f64);
+ // expected-error at -1 {{1st argument must be a scalar or vector of integer types (was 'double')}}
+
+ _Complex float c1;
+ c1 = __builtin_elementwise_ctz(c1);
+ // expected-error at -1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}}
+
+ v2i32 = __builtin_elementwise_ctz(v2i32, i32);
+ // expected-error at -1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}}
+
+ v2i32 = __builtin_elementwise_ctz(v2i32, f64);
+ // expected-error at -1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}}
+
+ v2i32 = __builtin_elementwise_ctz();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+ v2i32 = __builtin_elementwise_ctz(v2i32, v2i32, f64);
+ // expected-error at -1 {{too many arguments to function call, expected 2, have 3}}
+}
>From 898d62d9c81f678cf0be4a47e9c90d9d301d3c3d Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Wed, 19 Mar 2025 13:53:50 +0000
Subject: [PATCH 2/6] support constexpr
---
clang/lib/AST/ExprConstant.cpp | 55 ++++++++++++++++++--
clang/test/Sema/constant-builtins-vector.cpp | 34 ++++++++++++
2 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1b33b6706e204..645905528cb79 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11551,6 +11551,49 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case Builtin::BI__builtin_elementwise_clz:
+ case Builtin::BI__builtin_elementwise_ctz: {
+ APValue SourceLHS;
+ std::optional<APValue> Fallback;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS))
+ return false;
+ if (E->getNumArgs() > 1) {
+ APValue FallbackTmp;
+ if (!EvaluateAsRValue(Info, E->getArg(1), FallbackTmp))
+ return false;
+ Fallback = FallbackTmp;
+ }
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ if (!LHS) {
+ // Without a fallback, a zero element is undefined
+ if (!Fallback)
+ return false;
+ ResultElements.push_back(Fallback->getVectorElt(EltNum));
+ continue;
+ }
+ switch (E->getBuiltinCallee()) {
+ case Builtin::BI__builtin_elementwise_clz:
+ ResultElements.push_back(APValue(
+ APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countl_zero()),
+ DestEltTy->isUnsignedIntegerOrEnumerationType())));
+ break;
+ case Builtin::BI__builtin_elementwise_ctz:
+ ResultElements.push_back(APValue(
+ APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countr_zero()),
+ DestEltTy->isUnsignedIntegerOrEnumerationType())));
+ break;
+ }
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
}
}
@@ -13103,6 +13146,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_clzll:
case Builtin::BI__builtin_clzs:
case Builtin::BI__builtin_clzg:
+ case Builtin::BI__builtin_elementwise_clz:
case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
case Builtin::BI__lzcnt:
case Builtin::BI__lzcnt64: {
@@ -13111,7 +13155,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return false;
std::optional<APSInt> Fallback;
- if (BuiltinOp == Builtin::BI__builtin_clzg && E->getNumArgs() > 1) {
+ if ((BuiltinOp == Builtin::BI__builtin_clzg ||
+ BuiltinOp == Builtin::BI__builtin_elementwise_clz) &&
+ E->getNumArgs() > 1) {
APSInt FallbackTemp;
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
return false;
@@ -13183,13 +13229,16 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_ctzl:
case Builtin::BI__builtin_ctzll:
case Builtin::BI__builtin_ctzs:
- case Builtin::BI__builtin_ctzg: {
+ case Builtin::BI__builtin_ctzg:
+ case Builtin::BI__builtin_elementwise_ctz: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;
std::optional<APSInt> Fallback;
- if (BuiltinOp == Builtin::BI__builtin_ctzg && E->getNumArgs() > 1) {
+ if ((BuiltinOp == Builtin::BI__builtin_ctzg ||
+ BuiltinOp == Builtin::BI__builtin_elementwise_ctz) &&
+ E->getNumArgs() > 1) {
APSInt FallbackTemp;
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
return false;
diff --git a/clang/test/Sema/constant-builtins-vector.cpp b/clang/test/Sema/constant-builtins-vector.cpp
index bde5c478b2b6f..3d988cc655099 100644
--- a/clang/test/Sema/constant-builtins-vector.cpp
+++ b/clang/test/Sema/constant-builtins-vector.cpp
@@ -860,3 +860,37 @@ static_assert(__builtin_elementwise_sub_sat(0U, 1U) == 0U);
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4char){5, 4, 3, 2}, (vector4char){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304 : 0x04030201));
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4uchar){5, 4, 3, 2}, (vector4uchar){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304U : 0x04030201U));
static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_sub_sat((vector4short){(short)0x8000, (short)0x8001, (short)0x8002, (short)0x8003}, (vector4short){7, 8, 9, 10}) == (LITTLE_END ? 0x8000800080008000 : 0x8000800080008000)));
+
+static_assert(__builtin_elementwise_clz(2) == 30);
+static_assert(__builtin_elementwise_clz(2, 8) == 30);
+static_assert(__builtin_elementwise_clz(0, 8) == 8);
+static_assert(__builtin_elementwise_clz((char)2) == 6);
+static_assert(__builtin_elementwise_clz((short)2) == 14);
+static_assert(__builtin_elementwise_clz((char)1) == 0x7);
+static_assert(__builtin_elementwise_clz((char)4) == 0x5);
+static_assert(__builtin_elementwise_clz((char)127) == 0x1);
+static_assert(__builtin_elementwise_clz((char)128) == 0x0);
+static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 4, 127, (char)128})) == (LITTLE_END ? 0x00010507 : 0x07050100));
+
+constexpr int clz0 = __builtin_elementwise_clz(0);
+// expected-error at -1 {{must be initialized by a constant expression}}
+constexpr vector4char clz1 = __builtin_elementwise_clz((vector4char){1, 0, 3, 4});
+// expected-error at -1 {{must be initialized by a constant expression}}
+static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE01FF07 : 0x07FF01FE));
+
+static_assert(__builtin_elementwise_ctz(2) == 1);
+static_assert(__builtin_elementwise_ctz(2, 8) == 1);
+static_assert(__builtin_elementwise_ctz(0, 8) == 8);
+static_assert(__builtin_elementwise_ctz((char)2) == 1);
+static_assert(__builtin_elementwise_ctz((short)2) == 1);
+static_assert(__builtin_elementwise_ctz((char)8) == 0x3);
+static_assert(__builtin_elementwise_ctz((char)32) == 0x5);
+static_assert(__builtin_elementwise_ctz((char)127) == 0x0);
+static_assert(__builtin_elementwise_ctz((char)128) == 0x7);
+static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 32, 127, (char)128})) == (LITTLE_END ? 0x07000503 : 0x03050007));
+
+constexpr int ctz0 = __builtin_elementwise_ctz(0);
+// expected-error at -1 {{must be initialized by a constant expression}}
+constexpr vector4char ctz1 = __builtin_elementwise_ctz((vector4char){1, 0, 3, 4});
+// expected-error at -1 {{must be initialized by a constant expression}}
+static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE00FF03 : 0x03FF00FE));
>From 7369848f5922c7ecdb9d30ed4d6c670493bd7670 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Wed, 19 Mar 2025 14:01:58 +0000
Subject: [PATCH 3/6] update docs
---
clang/docs/LanguageExtensions.rst | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index c938fc64c129b..fe0567ee73c78 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -756,7 +756,8 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``,
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
-``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.
+``__builtin_elementwise_sub_sat``, ``__builtin_elementwise_clz``,
+``__builtin_elementwise_ctz`` can be called in a ``constexpr`` context.
No implicit promotion of integer types takes place. The mixing of integer types
of different sizes and signs is forbidden in binary and ternary builtins.
>From 055b73e863a079647095b37949c8c71826530522 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Tue, 15 Apr 2025 10:35:54 +0100
Subject: [PATCH 4/6] docs: result is undefined; not behaviour
---
clang/docs/LanguageExtensions.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index fe0567ee73c78..96adf955e4a5f 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -852,11 +852,11 @@ of different sizes and signs is forbidden in binary and ternary builtins.
T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types
the first argument is 0 and an optional second argument is provided,
the second argument is returned. If the first argument is 0 but only
- one argument is provided, the behaviour is undefined.
+ one argument is provided, the result is undefined.
T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types
the first argument is 0 and an optional second argument is provided,
the second argument is returned. If the first argument is 0 but only
- one argument is provided, the behaviour is undefined.
+ one argument is provided, the result is undefined.
============================================== ====================================================================== =========================================
>From 2218d319990eaa04ea68080c9e005c079834bc81 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Wed, 16 Jul 2025 12:11:24 +0100
Subject: [PATCH 5/6] add support for new constexpr interpreter
---
.../include/clang/Basic/DiagnosticASTKinds.td | 3 +
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 92 +++++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 15 ++-
clang/test/Sema/constant-builtins-vector.cpp | 12 ++-
4 files changed, 117 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index a67b9995d3b54..39b30e4ea456e 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -396,6 +396,9 @@ def note_constexpr_non_const_vectorelements : Note<
"cannot determine number of elements for sizeless vectors in a constant expression">;
def note_constexpr_assumption_failed : Note<
"assumption evaluated to false">;
+def note_constexpr_countzeroes_zero : Note<
+ "evaluation of %select{__builtin_elementwise_clz|__builtin_elementwise_ctz}0 "
+ "with a zero value is undefined">;
def err_experimental_clang_interp_failed : Error<
"the experimental clang interpreter failed to evaluate an expression">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index de0b97fd93c76..06c53f17a7065 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1737,6 +1737,93 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC,
return true;
}
+/// Can be called with an integer or vector as the first and only parameter.
+static bool interp__builtin_elementwise_countzeroes(InterpState &S,
+ CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ unsigned BuiltinID) {
+ const bool HasZeroArg = Call->getNumArgs() == 2;
+ const bool IsCTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctz;
+ assert(Call->getNumArgs() == 1 || HasZeroArg);
+ if (Call->getArg(0)->getType()->isIntegerType()) {
+ PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
+ APSInt Val = popToAPSInt(S.Stk, ArgT);
+ std::optional<APSInt> ZeroVal;
+ if (HasZeroArg) {
+ ZeroVal = Val;
+ Val = popToAPSInt(S.Stk, ArgT);
+ }
+
+ if (Val.isZero()) {
+ if (ZeroVal) {
+ pushInteger(S, *ZeroVal, Call->getType());
+ return true;
+ }
+ // If we haven't been provided the second argument, the result is
+ // undefined
+ S.FFDiag(S.Current->getSource(OpPC),
+ diag::note_constexpr_countzeroes_zero)
+ << /*IsTrailing=*/IsCTZ;
+ return false;
+ }
+
+ if (BuiltinID == Builtin::BI__builtin_elementwise_clz) {
+ pushInteger(S, Val.countLeadingZeros(), Call->getType());
+ } else {
+ pushInteger(S, Val.countTrailingZeros(), Call->getType());
+ }
+ return true;
+ }
+ // Otherwise, the argument must be a vector.
+ const ASTContext &ASTCtx = S.getASTContext();
+ Pointer ZeroArg;
+ if (HasZeroArg) {
+ assert(Call->getArg(1)->getType()->isVectorType() &&
+ ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
+ Call->getArg(1)->getType()));
+ ZeroArg = S.Stk.pop<Pointer>();
+ assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
+ }
+ assert(Call->getArg(0)->getType()->isVectorType());
+ const Pointer &Arg = S.Stk.pop<Pointer>();
+ assert(Arg.getFieldDesc()->isPrimitiveArray());
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ assert(Dst.getFieldDesc()->isPrimitiveArray());
+ assert(Arg.getFieldDesc()->getNumElems() ==
+ Dst.getFieldDesc()->getNumElems());
+
+ QualType ElemType = Arg.getFieldDesc()->getElemQualType();
+ PrimType ElemT = *S.getContext().classify(ElemType);
+ unsigned NumElems = Arg.getNumElems();
+
+ // FIXME: Reading from uninitialized vector elements?
+ for (unsigned I = 0; I != NumElems; ++I) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
+ if (EltVal.isZero()) {
+ if (HasZeroArg) {
+ Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
+ } else {
+ // If we haven't been provided the second argument, the result is
+ // undefined
+ S.FFDiag(S.Current->getSource(OpPC),
+ diag::note_constexpr_countzeroes_zero)
+ << /*IsTrailing=*/IsCTZ;
+ return false;
+ }
+ } else if (IsCTZ) {
+ Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
+ } else {
+ Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
+ }
+ Dst.atIndex(I).initialize();
+ });
+ }
+
+ return true;
+}
+
static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call, unsigned ID) {
@@ -2600,6 +2687,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_ctzg:
return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
+ case Builtin::BI__builtin_elementwise_clz:
+ case Builtin::BI__builtin_elementwise_ctz:
+ return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
+ BuiltinID);
+
case Builtin::BI__builtin_bswap16:
case Builtin::BI__builtin_bswap32:
case Builtin::BI__builtin_bswap64:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 645905528cb79..a503752860e66 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11573,8 +11573,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
if (!LHS) {
// Without a fallback, a zero element is undefined
- if (!Fallback)
+ if (!Fallback) {
+ Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
+ << /*IsTrailing=*/(E->getBuiltinCallee() ==
+ Builtin::BI__builtin_elementwise_ctz);
return false;
+ }
ResultElements.push_back(Fallback->getVectorElt(EltNum));
continue;
}
@@ -13175,6 +13179,11 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
BuiltinOp != Builtin::BI__lzcnt &&
BuiltinOp != Builtin::BI__lzcnt64;
+ if (BuiltinOp == Builtin::BI__builtin_elementwise_clz) {
+ Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
+ << /*IsTrailing=*/false;
+ }
+
if (ZeroIsUndefined)
return Error(E);
}
@@ -13249,6 +13258,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
if (Fallback)
return Success(*Fallback, E);
+ if (BuiltinOp == Builtin::BI__builtin_elementwise_ctz) {
+ Info.FFDiag(E, diag::note_constexpr_countzeroes_zero)
+ << /*IsTrailing=*/true;
+ }
return Error(E);
}
diff --git a/clang/test/Sema/constant-builtins-vector.cpp b/clang/test/Sema/constant-builtins-vector.cpp
index 3d988cc655099..50b6959cefa4a 100644
--- a/clang/test/Sema/constant-builtins-vector.cpp
+++ b/clang/test/Sema/constant-builtins-vector.cpp
@@ -873,9 +873,11 @@ static_assert(__builtin_elementwise_clz((char)128) == 0x0);
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 4, 127, (char)128})) == (LITTLE_END ? 0x00010507 : 0x07050100));
constexpr int clz0 = __builtin_elementwise_clz(0);
-// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-error at -1 {{must be initialized by a constant expression}} \
+// expected-note at -1 {{evaluation of __builtin_elementwise_clz with a zero value is undefined}}
constexpr vector4char clz1 = __builtin_elementwise_clz((vector4char){1, 0, 3, 4});
-// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-error at -1 {{must be initialized by a constant expression}} \
+// expected-note at -1 {{evaluation of __builtin_elementwise_clz with a zero value is undefined}}
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE01FF07 : 0x07FF01FE));
static_assert(__builtin_elementwise_ctz(2) == 1);
@@ -890,7 +892,9 @@ static_assert(__builtin_elementwise_ctz((char)128) == 0x7);
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 32, 127, (char)128})) == (LITTLE_END ? 0x07000503 : 0x03050007));
constexpr int ctz0 = __builtin_elementwise_ctz(0);
-// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-error at -1 {{must be initialized by a constant expression}} \
+// expected-note at -1 {{evaluation of __builtin_elementwise_ctz with a zero value is undefined}}
constexpr vector4char ctz1 = __builtin_elementwise_ctz((vector4char){1, 0, 3, 4});
-// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-error at -1 {{must be initialized by a constant expression}} \
+// expected-note at -1 {{evaluation of __builtin_elementwise_ctz with a zero value is undefined}}
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE00FF03 : 0x03FF00FE));
>From 017971c8c252bd78dba20b5d14d6996e47173e97 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Wed, 16 Jul 2025 16:46:47 +0100
Subject: [PATCH 6/6] change ub wording
---
clang/docs/LanguageExtensions.rst | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 96adf955e4a5f..31138d4905212 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -851,12 +851,12 @@ of different sizes and signs is forbidden in binary and ternary builtins.
for the comparison.
T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types
the first argument is 0 and an optional second argument is provided,
- the second argument is returned. If the first argument is 0 but only
- one argument is provided, the result is undefined.
+ the second argument is returned. It is undefined behaviour if the
+ first argument is 0 and no second argument is provided.
T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types
the first argument is 0 and an optional second argument is provided,
- the second argument is returned. If the first argument is 0 but only
- one argument is provided, the result is undefined.
+ the second argument is returned. It is undefined behaviour if the
+ first argument is 0 and no second argument is provided.
============================================== ====================================================================== =========================================
More information about the cfe-commits
mailing list