[clang] 9d84f8d - clang: Add __builtin_elementwise_rint and nearbyint

Fri Jun 23 16:52:11 PDT 2023

Author: Matt Arsenault
Date: 2023-06-23T19:52:06-04:00
New Revision: 9d84f8dc948b3188fdec9a1a080eb6d845c2082d

URL: https://github.com/llvm/llvm-project/commit/9d84f8dc948b3188fdec9a1a080eb6d845c2082d
DIFF: https://github.com/llvm/llvm-project/commit/9d84f8dc948b3188fdec9a1a080eb6d845c2082d.diff

LOG: clang: Add __builtin_elementwise_rint and nearbyint

These are basically the same thing and only differ for strictfp,
so add both for future proofing. Note all the elementwise functions are
currently broken for strictfp, and use non-constrained ops. Add a test
that demonstrates this, but doesn't attempt to fix it.

Added: 
    clang/test/CodeGen/strictfp-elementwise-bulitins.cpp

Modified: 
    clang/docs/LanguageExtensions.rst
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/Builtins.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/builtins-elementwise-math.c
    clang/test/Sema/builtins-elementwise-math.c
    clang/test/SemaCXX/builtins-elementwise-math.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index bda55c007e6ae..c78a1231513e9 100644

--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -651,6 +651,19 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
                                              exceptions.
  T __builtin_elementwise_trunc(T x)          return the integral value nearest to but no larger in            floating point types
                                              magnitude than x
+
+  T __builtin_elementwise_nearbyint(T x)     round x to the nearest  integer value in floating point format,      floating point types
+                                             rounding according to the current rounding direction.
+                                             May not raise the inexact floating-point exception. This is
+                                             treated the same as ``__builtin_elementwise_rint`` unless
+                                             :ref:`FENV_ACCESS is enabled <floating-point-environment>`.
+
+ T __builtin_elementwise_rint(T x)           round x to the nearest  integer value in floating point format,      floating point types
+                                             rounding according to the current rounding
+                                             direction. May raise floating-point exceptions. This is treated
+                                             the same as ``__builtin_elementwise_nearbyint`` unless
+                                             :ref:`FENV_ACCESS is enabled <floating-point-environment>`.
+
  T __builtin_elementwise_canonicalize(T x)   return the platform specific canonical encoding                  floating point types
                                              of a floating-point number
  T __builtin_elementwise_copysign(T x, T y)  return the magnitude of x with the sign of y.                    floating point types

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 117449f4e2fad..ff9f8da044db5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -231,7 +231,12 @@ Non-comprehensive list of changes in this release
 - Added ``__builtin_elementwise_round`` for  builtin for floating
   point types. This allows access to ``llvm.round`` for
   arbitrary floating-point and vector of floating-point types.
-
+- Added ``__builtin_elementwise_rint`` for floating point types. This
+  allows access to ``llvm.rint`` for arbitrary floating-point and
+  vector of floating-point types.
+- Added ``__builtin_elementwise_nearbyint`` for floating point
+  types. This allows access to ``llvm.nearbyint`` for arbitrary
+  floating-point and vector of floating-point types.
 
 New Compiler Flags
 ------------------

diff  --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index aa0076aab951c..c0e045865e2c2 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -688,6 +688,8 @@ BUILTIN(__builtin_elementwise_log2, "v.", "nct")
 BUILTIN(__builtin_elementwise_log10, "v.", "nct")
 BUILTIN(__builtin_elementwise_roundeven, "v.", "nct")
 BUILTIN(__builtin_elementwise_round, "v.", "nct")
+BUILTIN(__builtin_elementwise_rint, "v.", "nct")
+BUILTIN(__builtin_elementwise_nearbyint, "v.", "nct")
 BUILTIN(__builtin_elementwise_sin, "v.", "nct")
 BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
 BUILTIN(__builtin_elementwise_canonicalize, "v.", "nct")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bfd9c4d434bb7..bf3e5a54f67b9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3197,6 +3197,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_round:
     return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
                                         "elt.round"));
+  case Builtin::BI__builtin_elementwise_rint:
+    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
+                                        "elt.rint"));
+  case Builtin::BI__builtin_elementwise_nearbyint:
+    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
+                                        "elt.nearbyint"));
   case Builtin::BI__builtin_elementwise_sin:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index b652f25cd039c..e713abaa617e3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2637,6 +2637,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_log10:
   case Builtin::BI__builtin_elementwise_roundeven:
   case Builtin::BI__builtin_elementwise_round:
+  case Builtin::BI__builtin_elementwise_rint:
+  case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {

diff  --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 4598faaa84163..729fd518c4488 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -484,6 +484,38 @@ void test_builtin_elementwise_round(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_round(vf1);
 }
 
+void test_builtin_elementwise_rint(float f1, float f2, double d1, double d2,
+                                   float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_rint(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.rint.f32(float [[F1]])
+  f2 = __builtin_elementwise_rint(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.rint.f64(double [[D1]])
+  d2 = __builtin_elementwise_rint(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.rint.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_rint(vf1);
+}
+
+void test_builtin_elementwise_nearbyint(float f1, float f2, double d1, double d2,
+                                        float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_nearbyint(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.nearbyint.f32(float [[F1]])
+  f2 = __builtin_elementwise_nearbyint(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.nearbyint.f64(double [[D1]])
+  d2 = __builtin_elementwise_nearbyint(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_nearbyint(vf1);
+}
+
 void test_builtin_elementwise_sin(float f1, float f2, double d1, double d2,
                                   float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_sin(

diff  --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
new file mode 100644
index 0000000000000..a2062f31da154
--- /dev/null
+++ b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -0,0 +1,219 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -frounding-math -ffp-exception-behavior=strict -O2 -emit-llvm -o - %s | FileCheck %s
+
+// FIXME: This demonstrates elementwise builtins are broken for strictfp and
+// produce unconstrained intrinsics
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+// Sanity check we're getting constrained ops for a non-builtin.
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z11strict_faddDv4_fS_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = tail call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4:[0-9]+]]
+// CHECK-NEXT:    ret <4 x float> [[ADD]]
+//
+float4 strict_fadd(float4 a, float4 b) {
+  return a + b;
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_absDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_ABS]]
+//
+float4 strict_elementwise_abs(float4 a) {
+  return __builtin_elementwise_abs(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_maxDv4_fS_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_MAX:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_MAX]]
+//
+float4 strict_elementwise_max(float4 a, float4 b) {
+  return __builtin_elementwise_max(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_minDv4_fS_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_MIN:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_MIN]]
+//
+float4 strict_elementwise_min(float4 a, float4 b) {
+  return __builtin_elementwise_min(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_CEIL:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_CEIL]]
+//
+float4 strict_elementwise_ceil(float4 a) {
+  return __builtin_elementwise_ceil(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_cosDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_COS:%.*]] = tail call <4 x float> @llvm.cos.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_COS]]
+//
+float4 strict_elementwise_cos(float4 a) {
+  return __builtin_elementwise_cos(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_expDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_EXP:%.*]] = tail call <4 x float> @llvm.exp.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_EXP]]
+//
+float4 strict_elementwise_exp(float4 a) {
+  return __builtin_elementwise_exp(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_exp2Dv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_EXP2:%.*]] = tail call <4 x float> @llvm.exp2.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_EXP2]]
+//
+float4 strict_elementwise_exp2(float4 a) {
+  return __builtin_elementwise_exp2(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_floorDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_FLOOR:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_FLOOR]]
+//
+float4 strict_elementwise_floor(float4 a) {
+  return __builtin_elementwise_floor(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_logDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_LOG:%.*]] = tail call <4 x float> @llvm.log.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_LOG]]
+//
+float4 strict_elementwise_log(float4 a) {
+  return __builtin_elementwise_log(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_log2Dv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_LOG2]]
+//
+float4 strict_elementwise_log2(float4 a) {
+  return __builtin_elementwise_log2(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_log10Dv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_LOG2]]
+//
+float4 strict_elementwise_log10(float4 a) {
+  return __builtin_elementwise_log2(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z28strict_elementwise_roundevenDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_ROUNDEVEN:%.*]] = tail call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_ROUNDEVEN]]
+//
+float4 strict_elementwise_roundeven(float4 a) {
+  return __builtin_elementwise_roundeven(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_roundDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_ROUND:%.*]] = tail call <4 x float> @llvm.round.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_ROUND]]
+//
+float4 strict_elementwise_round(float4 a) {
+  return __builtin_elementwise_round(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_rintDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_RINT:%.*]] = tail call <4 x float> @llvm.rint.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_RINT]]
+//
+float4 strict_elementwise_rint(float4 a) {
+  return __builtin_elementwise_rint(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z28strict_elementwise_nearbyintDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_NEARBYINT:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_NEARBYINT]]
+//
+float4 strict_elementwise_nearbyint(float4 a) {
+  return __builtin_elementwise_nearbyint(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_sinDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_SIN:%.*]] = tail call <4 x float> @llvm.sin.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_SIN]]
+//
+float4 strict_elementwise_sin(float4 a) {
+  return __builtin_elementwise_sin(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_TRUNC:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_TRUNC]]
+//
+float4 strict_elementwise_trunc(float4 a) {
+  return __builtin_elementwise_trunc(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z31strict_elementwise_canonicalizeDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_CANONICALIZE]]
+//
+float4 strict_elementwise_canonicalize(float4 a) {
+  return __builtin_elementwise_canonicalize(a);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z27strict_elementwise_copysignDv4_fS_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float4 strict_elementwise_copysign(float4 a, float4 b) {
+  return __builtin_elementwise_copysign(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_fmaDv4_fS_S_
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> [[A]], <4 x float> [[B]], <4 x float> [[C]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float4 strict_elementwise_fma(float4 a, float4 b, float4 c) {
+  return __builtin_elementwise_fma(a, b, c);
+}
+

diff  --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 030c03eccd646..c23367df110ce 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -460,7 +460,6 @@ void test_builtin_elementwise_roundeven(int i, float f, double d, float4 v, int3
 }
 
 void test_builtin_elementwise_round(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
-
   struct Foo s = __builtin_elementwise_round(f);
   // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
 
@@ -485,6 +484,56 @@ void test_builtin_elementwise_round(int i, float f, double d, float4 v, int3 iv,
   // expected-error at -1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
 }
 
+void test_builtin_elementwise_rint(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+  struct Foo s = __builtin_elementwise_rint(f);
+  // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_rint();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_rint(i);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_rint(f, f);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_rint(u);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_rint(uv);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+
+  // FIXME: Error should not mention integer
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_rint(c1);
+  // expected-error at -1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
+}
+
+void test_builtin_elementwise_nearbyint(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+  struct Foo s = __builtin_elementwise_nearbyint(f);
+  // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_nearbyint();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_nearbyint(i);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_nearbyint(f, f);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_nearbyint(u);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_nearbyint(uv);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+
+  // FIXME: Error should not mention integer
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_nearbyint(c1);
+  // expected-error at -1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
+}
+
 void test_builtin_elementwise_sin(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_sin(f);

diff  --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
index 78cfcacb6ab58..4895c111584ce 100644
--- a/clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -108,3 +108,17 @@ void test_builtin_elementwise_log2() {
   static_assert(!is_const<decltype(__builtin_elementwise_log2(a))>::value);
   static_assert(!is_const<decltype(__builtin_elementwise_log2(b))>::value);
 }
+
+void test_builtin_elementwise_rint() {
+  const float a = 42.5;
+  float b = 42.3;
+  static_assert(!is_const<decltype(__builtin_elementwise_rint(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_rint(b))>::value);
+}
+
+void test_builtin_elementwise_nearbyint() {
+  const float a = 42.5;
+  float b = 42.3;
+  static_assert(!is_const<decltype(__builtin_elementwise_nearbyint(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_nearbyint(b))>::value);
+}