[llvm] 00a6254 - Stop traping on sNaN in __builtin_isnan

Fri Feb 5 10:28:51 PST 2021

Author: Thomas Preud'homme
Date: 2021-02-05T18:28:48Z
New Revision: 00a62547da7e609d1641c107d01d95504883921f

URL: https://github.com/llvm/llvm-project/commit/00a62547da7e609d1641c107d01d95504883921f
DIFF: https://github.com/llvm/llvm-project/commit/00a62547da7e609d1641c107d01d95504883921f.diff

LOG: Stop traping on sNaN in __builtin_isnan

__builtin_isnan currently generates a floating-point compare operation
which triggers a trap when faced with a signaling NaN in StrictFP mode.
This commit uses integer operations instead to not generate any trap in
such a case.

Reviewed By: kpn

Differential Revision: https://reviews.llvm.org/D95948

Added: 
    clang/test/CodeGen/X86/strictfp_builtins.c
    clang/test/CodeGen/aarch64-strictfp-builtins.c
    clang/test/CodeGen/strictfp_builtins.c

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    llvm/include/llvm/ADT/APFloat.h
    llvm/include/llvm/IR/Type.h

Removed: 
    clang/test/CodeGen/strictfp_fpclassify.c


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d8bb1bc84daa..4ff84ce8b79f 100644

--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -26,6 +26,8 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -2985,10 +2987,32 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   }
   case Builtin::BI__builtin_isnan: {
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
-    // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
     Value *V = EmitScalarExpr(E->getArg(0));
-    V = Builder.CreateFCmpUNO(V, V, "cmp");
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
+    llvm::Type *Ty = V->getType();
+    const llvm::fltSemantics &Semantics = Ty->getFltSemantics();
+    if (!Builder.getIsFPConstrained() ||
+        Builder.getDefaultConstrainedExcept() == fp::ebIgnore ||
+        !Ty->isIEEE()) {
+      V = Builder.CreateFCmpUNO(V, V, "cmp");
+      return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
+    }
+
+    // NaN has all exp bits set and a non zero significand. Therefore:
+    // isnan(V) == ((exp mask - (abs(V) & exp mask)) < 0)
+    unsigned bitsize = Ty->getScalarSizeInBits();
+    llvm::IntegerType *IntTy = Builder.getIntNTy(bitsize);
+    Value *IntV = Builder.CreateBitCast(V, IntTy);
+    APInt AndMask = APInt::getSignedMaxValue(bitsize);
+    Value *AbsV =
+        Builder.CreateAnd(IntV, llvm::ConstantInt::get(IntTy, AndMask));
+    APInt ExpMask = APFloat::getInf(Semantics).bitcastToAPInt();
+    Value *Sub =
+        Builder.CreateSub(llvm::ConstantInt::get(IntTy, ExpMask), AbsV);
+    // V = sign bit (Sub) <=> V = (Sub < 0)
+    V = Builder.CreateLShr(Sub, llvm::ConstantInt::get(IntTy, bitsize - 1));
+    if (bitsize > 32)
+      V = Builder.CreateTrunc(V, ConvertType(E->getType()));
+    return RValue::get(V);
   }
 
   case Builtin::BI__builtin_matrix_transpose: {

diff  --git a/clang/test/CodeGen/X86/strictfp_builtins.c b/clang/test/CodeGen/X86/strictfp_builtins.c
new file mode 100644
index 000000000000..d7eda34fb45e
--- /dev/null
+++ b/clang/test/CodeGen/X86/strictfp_builtins.c
@@ -0,0 +1,46 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -o - -triple x86_64-unknown-unknown | FileCheck %s
+
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+// FIXME: these functions shouldn't trap on SNaN.
+
+#pragma float_control(except, on)
+
+int printf(const char *, ...);
+
+// CHECK-LABEL: @p(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[STR_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[STR:%.*]], i8** [[STR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[X:%.*]], i32* [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[STR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X_ADDR]], align 4
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]]) [[ATTR4:#.*]]
+// CHECK-NEXT:    ret void
+//
+void p(char *str, int x) {
+  printf("%s: %d\n", str, x);
+}
+
+#define P(n,args) p(#n #args, __builtin_##n args)
+
+// CHECK-LABEL: @test_long_double_isnan(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    store x86_fp80 [[D:%.*]], x86_fp80* [[LD_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast x86_fp80 [[TMP0]] to i80
+// CHECK-NEXT:    [[ABS:%.*]] = and i80 [[BITCAST]], 604462909807314587353087
+// CHECK-NEXT:    [[TMP1:%.*]] = sub i80 604453686435277732577280, [[ABS]]
+// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i80 [[TMP1]], 79
+// CHECK-NEXT:    [[RES:%.*]] = trunc i80 [[ISNAN]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
+// CHECK-NEXT:    ret void
+//
+void test_long_double_isnan(long double ld) {
+  P(isnan, (ld));
+
+  return;
+}

diff  --git a/clang/test/CodeGen/aarch64-strictfp-builtins.c b/clang/test/CodeGen/aarch64-strictfp-builtins.c
new file mode 100644
index 000000000000..14647c31e647
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-strictfp-builtins.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 %s -emit-llvm -ffp-exception-behavior=maytrap -fexperimental-strict-floating-point -o - -triple arm64-none-linux-gnu | FileCheck %s
+
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+
+int printf(const char *, ...);
+
+// CHECK-LABEL: @p(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[STR_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8* [[STR:%.*]], i8** [[STR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[X:%.*]], i32* [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[STR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X_ADDR]], align 4
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i8* [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret void
+//
+void p(char *str, int x) {
+  printf("%s: %d\n", str, x);
+}
+
+#define P(n,args) p(#n #args, __builtin_##n args)
+
+// CHECK-LABEL: @test_long_double_isnan(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LD_ADDR:%.*]] = alloca fp128, align 16
+// CHECK-NEXT:    store fp128 [[D:%.*]], fp128* [[LD_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load fp128, fp128* [[LD_ADDR]], align 16
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast fp128 [[TMP0]] to i128
+// CHECK-NEXT:    [[ABS:%.*]] = and i128 [[BITCAST]], 170141183460469231731687303715884105727
+// CHECK-NEXT:    [[TMP1:%.*]] = sub i128 170135991163610696904058773219554885632, [[ABS]]
+// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i128 [[TMP1]], 127
+// CHECK-NEXT:    [[RES:%.*]] = trunc i128 [[ISNAN]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), i32 [[RES]])
+// CHECK-NEXT:    ret void
+//
+void test_long_double_isnan(long double ld) {
+  P(isnan, (ld));
+
+  return;
+}

diff  --git a/clang/test/CodeGen/strictfp_fpclassify.c b/clang/test/CodeGen/strictfp_builtins.c
similarity index 83%
rename from clang/test/CodeGen/strictfp_fpclassify.c
rename to clang/test/CodeGen/strictfp_builtins.c
index 404dba431687..131c9406fab6 100644
--- a/clang/test/CodeGen/strictfp_fpclassify.c
+++ b/clang/test/CodeGen/strictfp_builtins.c
@@ -92,17 +92,38 @@ void test_isinf_sign(double d) {
   return;
 }
 
-// CHECK-LABEL: @test_isnan(
+// CHECK-LABEL: @test_float_isnan(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store float [[F:%.*]], float* [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast float [[TMP0]] to i32
+// CHECK-NEXT:    [[ABS:%.*]] = and i32 [[BITCAST]], [[#%u,0x7FFFFFFF]]
+// CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[#%u,0x7F800000]], [[ABS]]
+// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i32 [[TMP1]], 31
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.4, i64 0, i64 0), i32 [[ISNAN]]) [[ATTR4]]
+// CHECK-NEXT:    ret void
+//
+void test_float_isnan(float f) {
+  P(isnan, (f));
+
+  return;
+}
+
+// CHECK-LABEL: @test_double_isnan(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[D_ADDR:%.*]] = alloca double, align 8
 // CHECK-NEXT:    store double [[D:%.*]], double* [[D_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8
-// CHECK-NEXT:    [[CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP0]], metadata !"uno", metadata !"fpexcept.strict") [[ATTR4]]
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[CMP]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.4, i64 0, i64 0), i32 [[TMP1]]) [[ATTR4]]
+// CHECK-NEXT:    [[BITCAST:%.*]] = bitcast double [[TMP0]] to i64
+// CHECK-NEXT:    [[ABS:%.*]] = and i64 [[BITCAST]], [[#%u,0x7FFFFFFFFFFFFFFF]]
+// CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[#%u,0x7FF0000000000000]], [[ABS]]
+// CHECK-NEXT:    [[ISNAN:%.*]] = lshr i64 [[TMP1]], 63
+// CHECK-NEXT:    [[RES:%.*]] = trunc i64 [[ISNAN]] to i32
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.5, i64 0, i64 0), i32 [[RES]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
-void test_isnan(double d) {
+void test_double_isnan(double d) {
   P(isnan, (d));
 
   return;
@@ -120,7 +141,7 @@ void test_isnan(double d) {
 // CHECK-NEXT:    [[AND:%.*]] = and i1 [[ISEQ]], [[ISINF]]
 // CHECK-NEXT:    [[AND1:%.*]] = and i1 [[AND]], [[ISNORMAL]]
 // CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[AND1]] to i32
-// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.5, i64 0, i64 0), i32 [[TMP2]]) [[ATTR4]]
+// CHECK-NEXT:    call void @p(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.6, i64 0, i64 0), i32 [[TMP2]]) [[ATTR4]]
 // CHECK-NEXT:    ret void
 //
 void test_isnormal(double d) {

diff  --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8aafd525196d..331dcd699c01 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1218,6 +1218,7 @@ class APFloat : public APFloatBase {
   bool isSmallest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isSmallest()); }
   bool isLargest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isLargest()); }
   bool isInteger() const { APFLOAT_DISPATCH_ON_SEMANTICS(isInteger()); }
+  bool isIEEE() const { return usesLayout<IEEEFloat>(getSemantics()); }
 
   APFloat &operator=(const APFloat &RHS) = default;
   APFloat &operator=(APFloat &&RHS) = default;

diff  --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 756c69dd6ae9..2b1f054b1118 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -308,6 +308,10 @@ class Type {
   /// ppc long double), this method returns -1.
   int getFPMantissaWidth() const;
 
+  /// Return whether the type is IEEE compatible, as defined by the eponymous
+  /// method in APFloat.
+  bool isIEEE() const { return APFloat::getZero(getFltSemantics()).isIEEE(); }
+
   /// If this is a vector type, return the element type, otherwise return
   /// 'this'.
   inline Type *getScalarType() const {