[clang] c3bf73b - [clang] Add elementwise fshl/fshr builtins (#153113)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Aug 12 04:57:59 PDT 2025
Author: Chaitanya Koparkar
Date: 2025-08-12T20:57:55+09:00
New Revision: c3bf73bc4ade26b8b9dd5080ce7bccd88037cfd0
URL: https://github.com/llvm/llvm-project/commit/c3bf73bc4ade26b8b9dd5080ce7bccd88037cfd0
DIFF: https://github.com/llvm/llvm-project/commit/c3bf73bc4ade26b8b9dd5080ce7bccd88037cfd0.diff
LOG: [clang] Add elementwise fshl/fshr builtins (#153113)
This patch implements `__builtin_elementwise_fshl` and
`__builtin_elementwise_fshr` builtins.
These map to the fshl/fshr intrinsics described here:
- https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic
- https://llvm.org/docs/LangRef.html#llvm-fshr-intrinsic
Fixes https://github.com/llvm/llvm-project/issues/152555.
Added:
Modified:
clang/docs/LanguageExtensions.rst
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/Builtins.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/builtins-elementwise-math.c
clang/test/Sema/builtins-elementwise-math.c
Removed:
################################################################################
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 5f2e57b5db5df..eef3d0c4ccb9d 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -860,6 +860,15 @@ of
diff erent sizes and signs is forbidden in binary and ternary builtins.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
for the comparison.
+T __builtin_elementwise_fshl(T x, T y, T z) perform a funnel shift left. Concatenate x and y (x is the most integer types
+ significant bits of the wide value), the combined value is shifted
+ left by z, and the most significant bits are extracted to produce
+ a result that is the same size as the original arguments.
+
+T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Concatenate x and y (x is the most integer types
+ significant bits of the wide value), the combined value is shifted
+ right by z, and the least significant bits are extracted to produce
+ a result that is the same size as the original arguments.
============================================== ====================================================================== =========================================
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 29eb18bcf85fd..0e7aa3a56d0ba 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -113,6 +113,8 @@ C23 Feature Support
Non-comprehensive list of changes in this release
-------------------------------------------------
+- Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``.
+
- Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``.
- Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 0181674da89ac..84206cf8b368b 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1514,6 +1514,18 @@ def ElementwiseSubSat : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseFshl : Builtin {
+ let Spellings = ["__builtin_elementwise_fshl"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseFshr : Builtin {
+ let Spellings = ["__builtin_elementwise_fshr"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ReduceMax : Builtin {
let Spellings = ["__builtin_reduce_max"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 071667ac772e8..a1f2a874f010d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4030,6 +4030,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_fma:
return RValue::get(
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma));
+ case Builtin::BI__builtin_elementwise_fshl:
+ return RValue::get(
+ emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl));
+ case Builtin::BI__builtin_elementwise_fshr:
+ return RValue::get(
+ emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr));
+
case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9ecee18661340..907740374dbfe 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3031,6 +3031,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
EltwiseBuiltinArgTyRestriction::IntegerTy))
return ExprError();
break;
+ case Builtin::BI__builtin_elementwise_fshl:
+ case Builtin::BI__builtin_elementwise_fshr:
+ if (BuiltinElementwiseTernaryMath(
+ TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
+ return ExprError();
+ break;
case Builtin::BI__builtin_elementwise_min:
case Builtin::BI__builtin_elementwise_max:
if (BuiltinElementwiseMath(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 9fd12f53da333..bb5d0351db1a2 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -1179,3 +1179,89 @@ void test_builtin_elementwise_fma(float f32, double f64,
half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);
}
+
+void test_builtin_elementwise_fshl(long long int i1, long long int i2,
+ long long int i3, unsigned short us1,
+ unsigned short us2, unsigned short us3,
+ char c1, char c2, char c3,
+ unsigned char uc1, unsigned char uc2,
+ unsigned char uc3, si8 vi1, si8 vi2,
+ si8 vi3, u4 vu1, u4 vu2, u4 vu3) {
+ // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr
+ // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr
+ // CHECK-NEXT: [[I3:%.+]] = load i64, ptr %i3.addr
+ // CHECK-NEXT: [[I4:%.+]] = call i64 @llvm.fshl.i64(i64 [[I1]], i64 [[I2]], i64 [[I3]])
+ // CHECK-NEXT: store i64 [[I4]], ptr %tmp_lli_l
+ // CHECK-NEXT: [[I5:%.+]] = load i64, ptr %i1.addr
+ // CHECK-NEXT: [[I6:%.+]] = load i64, ptr %i2.addr
+ // CHECK-NEXT: [[I7:%.+]] = load i64, ptr %i3.addr
+ // CHECK-NEXT: [[I8:%.+]] = call i64 @llvm.fshr.i64(i64 [[I5]], i64 [[I6]], i64 [[I7]])
+ // CHECK-NEXT: store i64 [[I8]], ptr %tmp_lli_r
+ long long int tmp_lli_l = __builtin_elementwise_fshl(i1, i2, i3);
+ long long int tmp_lli_r = __builtin_elementwise_fshr(i1, i2, i3);
+
+ // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr
+ // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr
+ // CHECK-NEXT: [[US3:%.+]] = load i16, ptr %us3.addr
+ // CHECK-NEXT: [[US4:%.+]] = call i16 @llvm.fshl.i16(i16 [[US1]], i16 [[US2]], i16 [[US3]])
+ // CHECK-NEXT: store i16 [[US4]], ptr %tmp_usi_l
+ // CHECK-NEXT: [[US5:%.+]] = load i16, ptr %us1.addr
+ // CHECK-NEXT: [[US6:%.+]] = load i16, ptr %us2.addr
+ // CHECK-NEXT: [[US7:%.+]] = load i16, ptr %us3.addr
+ // CHECK-NEXT: [[US8:%.+]] = call i16 @llvm.fshr.i16(i16 [[US5]], i16 [[US6]], i16 [[US7]])
+ // CHECK-NEXT: store i16 [[US8]], ptr %tmp_usi_r
+ unsigned short tmp_usi_l = __builtin_elementwise_fshl(us1, us2, us3);
+ unsigned short tmp_usi_r = __builtin_elementwise_fshr(us1, us2, us3);
+
+ // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr
+ // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr
+ // CHECK-NEXT: [[C3:%.+]] = load i8, ptr %c3.addr
+ // CHECK-NEXT: [[C4:%.+]] = call i8 @llvm.fshl.i8(i8 [[C1]], i8 [[C2]], i8 [[C3]])
+ // CHECK-NEXT: store i8 [[C4]], ptr %tmp_c_l
+ // CHECK-NEXT: [[C5:%.+]] = load i8, ptr %c1.addr
+ // CHECK-NEXT: [[C6:%.+]] = load i8, ptr %c2.addr
+ // CHECK-NEXT: [[C7:%.+]] = load i8, ptr %c3.addr
+ // CHECK-NEXT: [[C8:%.+]] = call i8 @llvm.fshr.i8(i8 [[C5]], i8 [[C6]], i8 [[C7]])
+ // CHECK-NEXT: store i8 [[C8]], ptr %tmp_c_r
+ char tmp_c_l = __builtin_elementwise_fshl(c1, c2, c3);
+ char tmp_c_r = __builtin_elementwise_fshr(c1, c2, c3);
+
+ // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr
+ // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr
+ // CHECK-NEXT: [[UC3:%.+]] = load i8, ptr %uc3.addr
+ // CHECK-NEXT: [[UC4:%.+]] = call i8 @llvm.fshl.i8(i8 [[UC1]], i8 [[UC2]], i8 [[UC3]])
+ // CHECK-NEXT: store i8 [[UC4]], ptr %tmp_uc_l
+ // CHECK-NEXT: [[UC5:%.+]] = load i8, ptr %uc1.addr
+ // CHECK-NEXT: [[UC6:%.+]] = load i8, ptr %uc2.addr
+ // CHECK-NEXT: [[UC7:%.+]] = load i8, ptr %uc3.addr
+ // CHECK-NEXT: [[UC8:%.+]] = call i8 @llvm.fshr.i8(i8 [[UC5]], i8 [[UC6]], i8 [[UC7]])
+ // CHECK-NEXT: store i8 [[UC8]], ptr %tmp_uc_r
+ unsigned char tmp_uc_l = __builtin_elementwise_fshl(uc1, uc2, uc3);
+ unsigned char tmp_uc_r = __builtin_elementwise_fshr(uc1, uc2, uc3);
+
+ // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr
+ // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr
+ // CHECK-NEXT: [[VI3:%.+]] = load <8 x i16>, ptr %vi3.addr
+ // CHECK-NEXT: [[VI4:%.+]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]], <8 x i16> [[VI3]])
+ // CHECK-NEXT: store <8 x i16> [[VI4]], ptr %tmp_vi_l
+ // CHECK-NEXT: [[VI5:%.+]] = load <8 x i16>, ptr %vi1.addr
+ // CHECK-NEXT: [[VI6:%.+]] = load <8 x i16>, ptr %vi2.addr
+ // CHECK-NEXT: [[VI7:%.+]] = load <8 x i16>, ptr %vi3.addr
+ // CHECK-NEXT: [[VI8:%.+]] = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> [[VI5]], <8 x i16> [[VI6]], <8 x i16> [[VI7]])
+ // CHECK-NEXT: store <8 x i16> [[VI8]], ptr %tmp_vi_r
+ si8 tmp_vi_l = __builtin_elementwise_fshl(vi1, vi2, vi3);
+ si8 tmp_vi_r = __builtin_elementwise_fshr(vi1, vi2, vi3);
+
+ // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr
+ // CHECK-NEXT: [[VU3:%.+]] = load <4 x i32>, ptr %vu3.addr
+ // CHECK-NEXT: [[VU4:%.+]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]], <4 x i32> [[VU3]])
+ // CHECK-NEXT: store <4 x i32> [[VU4]], ptr %tmp_vu_l
+ // CHECK-NEXT: [[VU5:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: [[VU6:%.+]] = load <4 x i32>, ptr %vu2.addr
+ // CHECK-NEXT: [[VU7:%.+]] = load <4 x i32>, ptr %vu3.addr
+ // CHECK-NEXT: [[VU8:%.+]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[VU5]], <4 x i32> [[VU6]], <4 x i32> [[VU7]])
+ // CHECK-NEXT: store <4 x i32> [[VU8]], ptr %tmp_vu_r
+ u4 tmp_vu_l = __builtin_elementwise_fshl(vu1, vu2, vu3);
+ u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 8548d3be8c44a..a80ff4bed4faf 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -1294,6 +1294,42 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16,
// expected-error at -1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}}
}
+void test_builtin_elementwise_fsh(int i32, int2 v2i32, short i16, int3 v3i32,
+ double f64, float f32, float2 v2f32) {
+ i32 = __builtin_elementwise_fshl();
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 0}}
+
+ i32 = __builtin_elementwise_fshr();
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 0}}
+
+ i32 = __builtin_elementwise_fshl(i32, i32);
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 2}}
+
+ i32 = __builtin_elementwise_fshr(i32, i32);
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 2}}
+
+ i32 = __builtin_elementwise_fshl(i32, i32, i16);
+ // expected-error at -1 {{arguments are of
diff erent types ('int' vs 'short')}}
+
+ i16 = __builtin_elementwise_fshr(i16, i32, i16);
+ // expected-error at -1 {{arguments are of
diff erent types ('short' vs 'int')}}
+
+ f32 = __builtin_elementwise_fshl(f32, f32, f32);
+ // expected-error at -1 {{argument must be a scalar or vector of integer types (was 'float')}}
+
+ f64 = __builtin_elementwise_fshr(f64, f64, f64);
+ // expected-error at -1 {{argument must be a scalar or vector of integer types (was 'double')}}
+
+ v2i32 = __builtin_elementwise_fshl(v2i32, v2i32, v2f32);
+ // expected-error at -1 {{argument must be a scalar or vector of integer types (was 'float2' (vector of 2 'float' values))}}
+
+ v2i32 = __builtin_elementwise_fshr(v2i32, v2i32, v3i32);
+ // expected-error at -1 {{arguments are of
diff erent types ('int2' (vector of 2 'int' values) vs 'int3' (vector of 3 'int' values))}}
+
+ v3i32 = __builtin_elementwise_fshl(v3i32, v3i32, v2i32);
+ // expected-error at -1 {{arguments are of
diff erent types ('int3' (vector of 3 'int' values) vs 'int2' (vector of 2 'int' values))}}
+}
+
typedef struct {
float3 b;
} struct_float3;
More information about the cfe-commits
mailing list