[clang] [clang][bytecode] Support __builtin_reduce_add (PR #117672)

Sun Dec 1 01:02:54 PST 2024

https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/117672

>From 1bb49850c392cdc535849f43ad14b02cbbb9dcc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 26 Nov 2024 06:33:12 +0100
Subject: [PATCH] [clang][bytecode] Support __builtin_reduce_add

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      | 48 ++++++++++++++++++-
 clang/test/AST/ByteCode/builtin-functions.cpp | 45 +++++++++++++++++
 2 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b450d8263c30bf..dc7d23f81d3f4e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -89,13 +89,14 @@ static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
   std::optional<PrimType> T = S.getContext().classify(QT);
   assert(T);
 
+  unsigned BitWidth = S.getASTContext().getTypeSize(QT);
   if (QT->isSignedIntegerOrEnumerationType()) {
     int64_t V = Val.getSExtValue();
-    INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V)); });
+    INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
   } else {
     assert(QT->isUnsignedIntegerOrEnumerationType());
     uint64_t V = Val.getZExtValue();
-    INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V)); });
+    INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
   }
 }
 
@@ -137,6 +138,8 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result,
     RET_CASE(PT_Uint32);
     RET_CASE(PT_Sint64);
     RET_CASE(PT_Uint64);
+    RET_CASE(PT_IntAP);
+    RET_CASE(PT_IntAPS);
   default:
     llvm_unreachable("Unsupported return type for builtin function");
   }
@@ -1684,6 +1687,42 @@ static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
+                                          const InterpFrame *Frame,
+                                          const Function *Func,
+                                          const CallExpr *Call) {
+  const Pointer &Arg = S.Stk.peek<Pointer>();
+  assert(Arg.getFieldDesc()->isPrimitiveArray());
+
+  unsigned ID = Func->getBuiltinID();
+  if (ID == Builtin::BI__builtin_reduce_add) {
+    QualType ElemType = Arg.getFieldDesc()->getElemQualType();
+    assert(Call->getType() == ElemType);
+    PrimType ElemT = *S.getContext().classify(ElemType);
+    unsigned NumElems = Arg.getNumElems();
+
+    INT_TYPE_SWITCH(ElemT, {
+      T Sum = Arg.atIndex(0).deref<T>();
+      unsigned BitWidth = Sum.bitWidth();
+      for (unsigned I = 1; I != NumElems; ++I) {
+        T Elem = Arg.atIndex(I).deref<T>();
+        if (T::add(Sum, Elem, BitWidth, &Sum)) {
+          unsigned OverflowBits = BitWidth + 1;
+          (void)handleOverflow(
+              S, OpPC,
+              (Sum.toAPSInt(OverflowBits) + Elem.toAPSInt(OverflowBits)));
+          return false;
+        }
+      }
+      pushInteger(S, Sum, Call->getType());
+    });
+
+    return true;
+  }
+
+  llvm_unreachable("Unsupported vector reduce builtin");
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call, uint32_t BuiltinID) {
   const InterpFrame *Frame = S.Current;
@@ -2130,6 +2169,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return false;
     break;
 
+  case Builtin::BI__builtin_reduce_add:
+    if (!interp__builtin_vector_reduce(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index b5d334178f8213..972d39ca509615 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -990,3 +990,48 @@ namespace BuiltinInImplicitCtor {
   } Foo;
   static_assert(Foo.a == 0, "");
 }
+
+
+typedef double vector4double __attribute__((__vector_size__(32)));
+typedef float vector4float __attribute__((__vector_size__(16)));
+typedef long long vector4long __attribute__((__vector_size__(32)));
+typedef int vector4int __attribute__((__vector_size__(16)));
+typedef unsigned long long vector4ulong __attribute__((__vector_size__(32)));
+typedef unsigned int vector4uint __attribute__((__vector_size__(16)));
+typedef short vector4short __attribute__((__vector_size__(8)));
+typedef char vector4char __attribute__((__vector_size__(4)));
+typedef double vector8double __attribute__((__vector_size__(64)));
+typedef float vector8float __attribute__((__vector_size__(32)));
+typedef long long vector8long __attribute__((__vector_size__(64)));
+typedef int vector8int __attribute__((__vector_size__(32)));
+typedef short vector8short __attribute__((__vector_size__(16)));
+typedef char vector8char __attribute__((__vector_size__(8)));
+
+namespace RecuceAdd {
+  static_assert(__builtin_reduce_add((vector4char){}) == 0);
+  static_assert(__builtin_reduce_add((vector4char){1, 2, 3, 4}) == 10);
+  static_assert(__builtin_reduce_add((vector4short){10, 20, 30, 40}) == 100);
+  static_assert(__builtin_reduce_add((vector4int){100, 200, 300, 400}) == 1000);
+  static_assert(__builtin_reduce_add((vector4long){1000, 2000, 3000, 4000}) == 10000);
+  constexpr int reduceAddInt1 = __builtin_reduce_add((vector4int){~(1 << (sizeof(int) * 8 - 1)), 0, 0, 1});
+  // both-error at -1 {{must be initialized by a constant expression}} \
+  // both-note at -1 {{outside the range of representable values of type 'int'}}
+  constexpr long long reduceAddLong1 = __builtin_reduce_add((vector4long){~(1LL << (sizeof(long long) * 8 - 1)), 0, 0, 1});
+  // both-error at -1 {{must be initialized by a constant expression}} \
+  // both-note at -1 {{outside the range of representable values of type 'long long'}}
+  constexpr int reduceAddInt2 = __builtin_reduce_add((vector4int){(1 << (sizeof(int) * 8 - 1)), 0, 0, -1});
+  // both-error at -1 {{must be initialized by a constant expression}} \
+  // both-note at -1 {{outside the range of representable values of type 'int'}}
+  constexpr long long reduceAddLong2 = __builtin_reduce_add((vector4long){(1LL << (sizeof(long long) * 8 - 1)), 0, 0, -1});
+  // both-error at -1 {{must be initialized by a constant expression}} \
+  // both-note at -1 {{outside the range of representable values of type 'long long'}}
+  static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
+  static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);
+
+
+#ifdef __SIZEOF_INT128__
+  typedef __int128 v4i128 __attribute__((__vector_size__(128 * 2)));
+  constexpr __int128 reduceAddInt3 = __builtin_reduce_add((v4i128){});
+  static_assert(reduceAddInt3 == 0);
+#endif
+}