[clang] 8a92c45 - [Clang] Add integer mul reduction builtin

Mon May 9 04:13:06 PDT 2022

Author: Simon Pilgrim
Date: 2022-05-09T12:12:53+01:00
New Revision: 8a92c45e07dc81c83ca3afda3971d98c512429d4

URL: https://github.com/llvm/llvm-project/commit/8a92c45e07dc81c83ca3afda3971d98c512429d4
DIFF: https://github.com/llvm/llvm-project/commit/8a92c45e07dc81c83ca3afda3971d98c512429d4.diff

LOG: [Clang] Add integer mul reduction builtin

Similar to the existing bitwise reduction builtins, this lowers to a llvm.vector.reduce.mul intrinsic call.

For other reductions, we've tried to share builtins for float/integer vectors, but the fmul reduction intrinsic also take a starting value argument and can either do unordered or serialized, but not reduction-trees as specified for the builtins. However we address fmul support this shouldn't affect the integer case.

Differential Revision: https://reviews.llvm.org/D117829

Added: 
    

Modified: 
    clang/docs/LanguageExtensions.rst
    clang/include/clang/Basic/Builtins.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/builtins-reduction-math.c
    clang/test/Sema/builtins-reduction-math.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index bc90f9cf7480b..3cdac02d8d3fb 100644

--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -647,6 +647,7 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
                                          is a NaN, return the other argument. If both arguments are
                                          NaNs, fmax() return a NaN.
  ET __builtin_reduce_add(VT a)           \+                                                               integer and floating point types
+ ET __builtin_reduce_mul(VT a)           *                                                                integer and floating point types
  ET __builtin_reduce_and(VT a)           &                                                                integer types
  ET __builtin_reduce_or(VT a)            \|                                                               integer types
  ET __builtin_reduce_xor(VT a)           ^                                                                integer types

diff  --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ad55fdbc7c62a..e9b8ac6c602a4 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -664,6 +664,7 @@ BUILTIN(__builtin_reduce_xor, "v.", "nct")
 BUILTIN(__builtin_reduce_or, "v.", "nct")
 BUILTIN(__builtin_reduce_and, "v.", "nct")
 BUILTIN(__builtin_reduce_add, "v.", "nct")
+BUILTIN(__builtin_reduce_mul, "v.", "nct")
 
 BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
 BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a76677ecc2b2c..bd689b2063c69 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3146,6 +3146,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_reduce_add:
     return RValue::get(emitUnaryBuiltin(
         *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
+  case Builtin::BI__builtin_reduce_mul:
+    return RValue::get(emitUnaryBuiltin(
+        *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
   case Builtin::BI__builtin_reduce_xor:
     return RValue::get(emitUnaryBuiltin(
         *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1ef8a6ae833a7..31459af5033f8 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2596,8 +2596,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   }
 
   // These builtins support vectors of integers only.
-  // TODO: ADD should support floating-point types.
+  // TODO: ADD/MUL should support floating-point types.
   case Builtin::BI__builtin_reduce_add:
+  case Builtin::BI__builtin_reduce_mul:
   case Builtin::BI__builtin_reduce_xor:
   case Builtin::BI__builtin_reduce_or:
   case Builtin::BI__builtin_reduce_and: {

diff  --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c
index 2988a318eb6c6..78ec794b2d9e0 100644
--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@@ -80,6 +80,28 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) {
   unsigned long long r5 = __builtin_reduce_add(cvu1);
 }
 
+void test_builtin_reduce_mul(si8 vi1, u4 vu1) {
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[VI1]])
+  short r2 = __builtin_reduce_mul(vi1);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VU1]])
+  unsigned r3 = __builtin_reduce_mul(vu1);
+
+  // CHECK:      [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
+  // CHECK-NEXT: [[RDX1:%.+]] = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[CVI1]])
+  // CHECK-NEXT: sext i16 [[RDX1]] to i32
+  const si8 cvi1 = vi1;
+  int r4 = __builtin_reduce_mul(cvi1);
+
+  // CHECK:      [[CVU1:%.+]] = load <4 x i32>, <4 x i32>* %cvu1, align 16
+  // CHECK-NEXT: [[RDX2:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[CVU1]])
+  // CHECK-NEXT: zext i32 [[RDX2]] to i64
+  const u4 cvu1 = vu1;
+  unsigned long long r5 = __builtin_reduce_mul(cvu1);
+}
+
 void test_builtin_reduce_xor(si8 vi1, u4 vu1) {
 
   // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16

diff  --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c
index 10c95a1e531bb..9d5eed75eb814 100644
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@@ -53,6 +53,23 @@ void test_builtin_reduce_add(int i, float4 v, int3 iv) {
   // expected-error at -1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
 }
 
+void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_mul(iv);
+  // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_reduce_mul();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_reduce_mul(iv, iv);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+
+  i = __builtin_reduce_mul(i);
+  // expected-error at -1 {{1st argument must be a vector of integers (was 'int')}}
+
+  i = __builtin_reduce_mul(v);
+  // expected-error at -1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+}
+
 void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
   struct Foo s = __builtin_reduce_xor(iv);
   // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}