[clang] [Clang] Add `__builtin_experimental_vectorcompress` (PR #102476)

Thu Aug 8 07:21:43 PDT 2024

https://github.com/lawben created https://github.com/llvm/llvm-project/pull/102476

This PR exposes the new `@llvm.experimental.vector.compress` intrinsic to Clang, so it can be called from C/C++. 

TODO: Add Docs and Release note if people are fine with this addition.

>From 40e7eca608e7f8cfe87fc51022dd878df67e0e12 Mon Sep 17 00:00:00 2001
From: Lawrence Benson <github at lawben.com>
Date: Thu, 8 Aug 2024 14:46:17 +0200
Subject: [PATCH 1/2] Add __builtin_vectorcompress skeleton

---
 clang/include/clang/Basic/Builtins.td       |  6 ++
 clang/lib/CodeGen/CGBuiltin.cpp             | 14 ++++
 clang/lib/Sema/SemaChecking.cpp             | 46 ++++++++++++
 clang/test/CodeGen/builtin_vectorcompress.c | 81 +++++++++++++++++++++
 4 files changed, 147 insertions(+)
 create mode 100644 clang/test/CodeGen/builtin_vectorcompress.c

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index b025a7681bfac3..666d4b2b65978a 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1218,6 +1218,12 @@ def NondetermenisticValue : Builtin {
   let Prototype = "void(...)";
 }
 
+def VectorCompress : Builtin {
+  let Spellings = ["__builtin_experimental_vectorcompress"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseAbs : Builtin {
   let Spellings = ["__builtin_elementwise_abs"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 51d1162c6e403c..86d47a2c533151 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3773,6 +3773,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
+  case Builtin::BI__builtin_experimental_vectorcompress: {
+    QualType VecTy = E->getArg(0)->getType();
+    Value *Vec = EmitScalarExpr(E->getArg(0));
+    Value *Mask = EmitScalarExpr(E->getArg(1));
+    Value *Passthru = E->getNumArgs() == 3 ? EmitScalarExpr(E->getArg(2)) : llvm::UndefValue::get(ConvertType(VecTy));
+
+    // Cast svbool_t to right number of elements.
+    if (VecTy->isSVESizelessBuiltinType())
+      Mask = EmitSVEPredicateCast(Mask, cast<llvm::ScalableVectorType>(Vec->getType()));
+
+    Function *F = CGM.getIntrinsic(Intrinsic::experimental_vector_compress, Vec->getType());
+    return RValue::get(Builder.CreateCall(F, {Vec, Mask, Passthru}));
+  }
+
   case Builtin::BI__builtin_elementwise_abs: {
     Value *Result;
     QualType QT = E->getArg(0)->getType();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ee143381cf4f79..68010b11759a2b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2804,6 +2804,52 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     TheCall->setType(Magnitude.get()->getType());
     break;
   }
+  case Builtin::BI__builtin_experimental_vectorcompress: {
+    unsigned NumArgs = TheCall->getNumArgs();
+    if (NumArgs < 2 || NumArgs > 3)
+      return ExprError();
+
+    Expr *VecArg = TheCall->getArg(0);
+    QualType VecTy = VecArg->getType();
+    if (!VecTy->isVectorType() && !VecTy->isSizelessVectorType()) {
+      Diag(VecArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+          << 1 << /* vector ty*/ 4 << VecTy;
+      return ExprError();
+    }
+
+    Expr *MaskArg = TheCall->getArg(1);
+    QualType MaskTy = MaskArg->getType();
+    if (!MaskTy->isVectorType() && !MaskTy->isSizelessVectorType()) {
+      Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+          << 1 << /* vector ty*/ 4 << MaskTy;
+      return ExprError();
+    }
+
+    if (VecTy->isVectorType() != MaskTy->isVectorType()) {
+      // TODO: diag
+      return ExprError();
+    }
+
+    if (VecTy->isVectorType() && VecTy->getAs<VectorType>()->getNumElements() != MaskTy->getAs<VectorType>()->getNumElements()) {
+        // TODO: diag
+        return ExprError();
+    }
+
+    // TODO: find way to compare MinKnownElements for sizeless vectors.
+    // if (VecTy->isSizelessVectorType() && VecTy->getAs<VectorType>()->getNumElements() != MaskTy->getAs<VectorType>()->getNumElements()) {}
+
+    if (NumArgs == 3) {
+      Expr *PassthruArg = TheCall->getArg(2);
+      QualType PassthruTy = PassthruArg->getType();
+      if (PassthruTy != VecTy) {
+        // TODO: diag
+        return ExprError();
+      }
+    }
+    TheCall->setType(VecTy);
+
+    break;
+  }
   case Builtin::BI__builtin_reduce_max:
   case Builtin::BI__builtin_reduce_min: {
     if (PrepareBuiltinReduceMathOneArgCall(TheCall))
diff --git a/clang/test/CodeGen/builtin_vectorcompress.c b/clang/test/CodeGen/builtin_vectorcompress.c
new file mode 100644
index 00000000000000..1eebb3461241b5
--- /dev/null
+++ b/clang/test/CodeGen/builtin_vectorcompress.c
@@ -0,0 +1,81 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -O1 -triple x86_64 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK %s
+
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve  %s -emit-llvm -o - | FileCheck --check-prefixes=SVE   %s
+
+typedef int int4 __attribute__((vector_size(16)));
+typedef float float8 __attribute__((vector_size(32)));
+typedef _Bool bitvec4 __attribute__((ext_vector_type(4)));
+typedef _Bool bitvec8 __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_builtin_vectorcompress_int4(
+// CHECK-SAME: <4 x i32> noundef [[VEC:%.*]], i8 noundef [[MASK_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> [[VEC]], <4 x i1> [[EXTRACTVEC]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+int4 test_builtin_vectorcompress_int4(int4 vec, bitvec4 mask) {
+  return __builtin_experimental_vectorcompress(vec, mask);
+}
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_builtin_vectorcompress_int4_passthru(
+// CHECK-SAME: <4 x i32> noundef [[VEC:%.*]], i8 noundef [[MASK_COERCE:%.*]], <4 x i32> noundef [[PASSTHRU:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> [[VEC]], <4 x i1> [[EXTRACTVEC]], <4 x i32> [[PASSTHRU]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+int4 test_builtin_vectorcompress_int4_passthru(int4 vec, bitvec4 mask, int4 passthru) {
+  return __builtin_experimental_vectorcompress(vec, mask, passthru);
+}
+
+// CHECK-LABEL: define dso_local <8 x float> @test_builtin_vectorcompress_float8(
+// CHECK-SAME: ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP0:%.*]], i8 noundef [[MASK_COERCE:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[MASK1:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> [[VEC]], <8 x i1> [[MASK1]], <8 x float> undef)
+// CHECK-NEXT:    ret <8 x float> [[TMP1]]
+float8 test_builtin_vectorcompress_float8(float8 vec, bitvec8 mask) {
+  return __builtin_experimental_vectorcompress(vec, mask);
+}
+
+// CHECK-LABEL: define dso_local <8 x float> @test_builtin_vectorcompress_float8_passthru(
+// CHECK-SAME: ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP0:%.*]], i8 noundef [[MASK_COERCE:%.*]], ptr nocapture noundef readonly byval(<8 x float>) align 32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VEC:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[MASK1:%.*]] = bitcast i8 [[MASK_COERCE]] to <8 x i1>
+// CHECK-NEXT:    [[PASSTHRU:%.*]] = load <8 x float>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> [[VEC]], <8 x i1> [[MASK1]], <8 x float> [[PASSTHRU]])
+// CHECK-NEXT:    ret <8 x float> [[TMP2]]
+float8 test_builtin_vectorcompress_float8_passthru(float8 vec, bitvec8 mask, float8 passthru) {
+  return __builtin_experimental_vectorcompress(vec, mask, passthru);
+}
+
+#if defined(__ARM_FEATURE_SVE)
+#include <arm_sve.h>
+
+// SVE-LABEL: define dso_local <vscale x 4 x i32> @test_builtin_vectorelements_sve32(
+// SVE-SAME: <vscale x 4 x i32> [[VEC:%.*]], <vscale x 16 x i1> [[MASK:%.*]]) local_unnamed_addr
+// SVE-NEXT:  [[ENTRY:.*:]]
+// SVE-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[MASK]])
+// SVE-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.compress.nxv4i32(<vscale x 4 x i32> [[VEC]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> undef)
+// SVE-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_builtin_vectorelements_sve32(svuint32_t vec, svbool_t mask) {
+  return __builtin_experimental_vectorcompress(vec, mask);
+}
+
+// SVE-LABEL: define dso_local <vscale x 16 x i8> @test_builtin_vectorelements_sve8(
+// SVE-SAME: <vscale x 16 x i8> [[VEC:%.*]], <vscale x 16 x i1> [[MASK:%.*]], <vscale x 16 x i8> [[PASSTHRU:%.*]]) local_unnamed_addr
+// SVE-NEXT:  [[ENTRY:.*:]]
+// SVE-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.experimental.vector.compress.nxv16i8(<vscale x 16 x i8> [[VEC]], <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> [[PASSTHRU]])
+// SVE-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_builtin_vectorelements_sve8(svuint8_t vec, svbool_t mask, svuint8_t passthru) {
+  return __builtin_experimental_vectorcompress(vec, mask, passthru);
+}
+#endif
+

>From 5f02623d251769cfb505810da19b99426ab25df2 Mon Sep 17 00:00:00 2001
From: Lawrence Benson <github at lawben.com>
Date: Thu, 8 Aug 2024 16:15:31 +0200
Subject: [PATCH 2/2] Add sema error messages

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +
 clang/lib/CodeGen/CGBuiltin.cpp               | 10 ++-
 clang/lib/Sema/SemaChecking.cpp               | 64 +++++++++++--------
 clang/test/Sema/builtin_vectorcompress.c      | 30 +++++++++
 4 files changed, 75 insertions(+), 31 deletions(-)
 create mode 100644 clang/test/Sema/builtin_vectorcompress.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 5cdf36660b2a66..1e7de962dc26d4 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3387,6 +3387,8 @@ def err_typecheck_vector_lengths_not_equal : Error<
 def warn_typecheck_vector_element_sizes_not_equal : Warning<
   "vector operands do not have the same elements sizes (%0 and %1)">,
   InGroup<DiagGroup<"vec-elem-size">>, DefaultError;
+def err_typecheck_scalable_fixed_vector_mismatch : Error<
+  "vectors must both be scalable or fixed-sized vectors">;
 def err_ext_vector_component_exceeds_length : Error<
   "vector component access exceeds type %0">;
 def err_ext_vector_component_name_illegal : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 86d47a2c533151..ab2875f7572f0f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3777,13 +3777,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     QualType VecTy = E->getArg(0)->getType();
     Value *Vec = EmitScalarExpr(E->getArg(0));
     Value *Mask = EmitScalarExpr(E->getArg(1));
-    Value *Passthru = E->getNumArgs() == 3 ? EmitScalarExpr(E->getArg(2)) : llvm::UndefValue::get(ConvertType(VecTy));
+    Value *Passthru = E->getNumArgs() == 3
+                          ? EmitScalarExpr(E->getArg(2))
+                          : llvm::UndefValue::get(ConvertType(VecTy));
 
     // Cast svbool_t to right number of elements.
     if (VecTy->isSVESizelessBuiltinType())
-      Mask = EmitSVEPredicateCast(Mask, cast<llvm::ScalableVectorType>(Vec->getType()));
+      Mask = EmitSVEPredicateCast(
+          Mask, cast<llvm::ScalableVectorType>(Vec->getType()));
 
-    Function *F = CGM.getIntrinsic(Intrinsic::experimental_vector_compress, Vec->getType());
+    Function *F = CGM.getIntrinsic(Intrinsic::experimental_vector_compress,
+                                   Vec->getType());
     return RValue::get(Builder.CreateCall(F, {Vec, Mask, Passthru}));
   }
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 68010b11759a2b..ca9f638c960e0a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2806,48 +2806,56 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   }
   case Builtin::BI__builtin_experimental_vectorcompress: {
     unsigned NumArgs = TheCall->getNumArgs();
-    if (NumArgs < 2 || NumArgs > 3)
-      return ExprError();
+    if (NumArgs < 2)
+      return Diag(TheCall->getEndLoc(),
+                  diag::err_typecheck_call_too_few_args_at_least)
+             << /*function*/ 0 << /*at least*/ 2 << /*got*/ NumArgs
+             << /*is non object*/ 0;
+
+    if (NumArgs > 3)
+      return Diag(TheCall->getEndLoc(),
+                  diag::err_typecheck_call_too_many_args_at_most)
+             << /*function*/ 0 << /*at most*/ 3 << /*got*/ NumArgs
+             << /*is non object*/ 0;
 
     Expr *VecArg = TheCall->getArg(0);
     QualType VecTy = VecArg->getType();
-    if (!VecTy->isVectorType() && !VecTy->isSizelessVectorType()) {
-      Diag(VecArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
-          << 1 << /* vector ty*/ 4 << VecTy;
-      return ExprError();
-    }
+    if (!VecTy->isVectorType() && !VecTy->isSizelessVectorType())
+      return Diag(VecArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+             << 1 << /* vector ty*/ 4 << VecTy;
 
     Expr *MaskArg = TheCall->getArg(1);
     QualType MaskTy = MaskArg->getType();
-    if (!MaskTy->isVectorType() && !MaskTy->isSizelessVectorType()) {
-      Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
-          << 1 << /* vector ty*/ 4 << MaskTy;
-      return ExprError();
-    }
-
-    if (VecTy->isVectorType() != MaskTy->isVectorType()) {
-      // TODO: diag
-      return ExprError();
-    }
-
-    if (VecTy->isVectorType() && VecTy->getAs<VectorType>()->getNumElements() != MaskTy->getAs<VectorType>()->getNumElements()) {
-        // TODO: diag
-        return ExprError();
-    }
+    if (!MaskTy->isVectorType() && !MaskTy->isSizelessVectorType())
+      return Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+             << 2 << /* vector ty*/ 4 << MaskTy;
+
+    if (VecTy->isVectorType() != MaskTy->isVectorType())
+      return Diag(MaskArg->getBeginLoc(),
+                  diag::err_typecheck_scalable_fixed_vector_mismatch);
+
+    if (VecTy->isVectorType() &&
+        VecTy->getAs<VectorType>()->getNumElements() !=
+            MaskTy->getAs<VectorType>()->getNumElements())
+      return Diag(VecArg->getBeginLoc(),
+                  diag::err_typecheck_vector_lengths_not_equal)
+             << VecTy->getAs<VectorType>()->getNumElements()
+             << MaskTy->getAs<VectorType>()->getNumElements();
 
     // TODO: find way to compare MinKnownElements for sizeless vectors.
-    // if (VecTy->isSizelessVectorType() && VecTy->getAs<VectorType>()->getNumElements() != MaskTy->getAs<VectorType>()->getNumElements()) {}
+    // if (VecTy->isSizelessVectorType() &&
+    // VecTy->getAs<VectorType>()->getNumElements() !=
+    // MaskTy->getAs<VectorType>()->getNumElements()) {}
 
     if (NumArgs == 3) {
       Expr *PassthruArg = TheCall->getArg(2);
       QualType PassthruTy = PassthruArg->getType();
-      if (PassthruTy != VecTy) {
-        // TODO: diag
-        return ExprError();
-      }
+      if (PassthruTy != VecTy)
+        return Diag(PassthruArg->getBeginLoc(),
+                    diag::err_typecheck_call_different_arg_types)
+               << VecTy << PassthruTy;
     }
     TheCall->setType(VecTy);
-
     break;
   }
   case Builtin::BI__builtin_reduce_max:
diff --git a/clang/test/Sema/builtin_vectorcompress.c b/clang/test/Sema/builtin_vectorcompress.c
new file mode 100644
index 00000000000000..5b55a4081c1660
--- /dev/null
+++ b/clang/test/Sema/builtin_vectorcompress.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple aarch64 -fsyntax-only -verify -disable-llvm-passes %s
+
+typedef int int4 __attribute__((vector_size(16)));
+typedef float float8 __attribute__((vector_size(32)));
+typedef _Bool bitvec4 __attribute__((ext_vector_type(4)));
+typedef _Bool bitvec8 __attribute__((ext_vector_type(8)));
+
+void test_builtin_vectorelements(int4 vec1, float8 vec2, bitvec4 mask1, bitvec8 mask2, int4 passthru1, float8 passthru2) {
+  // wrong number of arguments
+  __builtin_experimental_vectorcompress(vec1); // expected-error {{too few arguments to function call}}
+  __builtin_experimental_vectorcompress(vec1, mask2, passthru1, passthru1); // expected-error {{too many arguments to function call}}
+
+  // valid
+  (void) __builtin_experimental_vectorcompress(vec1, mask1);
+  (void) __builtin_experimental_vectorcompress(vec1, mask1, passthru1);
+  (void) __builtin_experimental_vectorcompress(vec2, mask2);
+  (void) __builtin_experimental_vectorcompress(vec2, mask2, passthru2);
+
+  // type mismatch
+  __builtin_experimental_vectorcompress(vec1, mask2); // expected-error {{vector operands do not have the same number of elements}}
+  __builtin_experimental_vectorcompress(vec2, mask1); // expected-error {{vector operands do not have the same number of elements}}
+  __builtin_experimental_vectorcompress(vec1, mask1, passthru2); // expected-error {{arguments are of different types}}
+
+  // invalid types
+  int a;
+  __builtin_experimental_vectorcompress(a, mask1, passthru1); // expected-error {{1st argument must be a vector type (was 'int')}}
+  __builtin_experimental_vectorcompress(vec1, a, passthru1); // expected-error {{2nd argument must be a vector type (was 'int')}}
+  __builtin_experimental_vectorcompress(vec1, mask1, a); // expected-error {{arguments are of different types}}
+}
+