[llvm] bfd5121 - [InstCombine] improve analysis of FP->int->FP to eliminate fpextend

Sun May 17 06:07:20 PDT 2020

Author: Sanjay Patel
Date: 2020-05-17T09:06:57-04:00
New Revision: bfd512160fe091bdd45199e5db884a24cd9d5f59

URL: https://github.com/llvm/llvm-project/commit/bfd512160fe091bdd45199e5db884a24cd9d5f59
DIFF: https://github.com/llvm/llvm-project/commit/bfd512160fe091bdd45199e5db884a24cd9d5f59.diff

LOG: [InstCombine] improve analysis of FP->int->FP to eliminate fpextend

This was originally in D79116.
Converting from a narrow-enough FP source value to integer and
back to FP guarantees that the conversion to FP is exact because
of UB/poison-on-overflow.

This was suggested in PR36617:
https://bugs.llvm.org/show_bug.cgi?id=36617#c19

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/fpextend.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 16d94d6946b1..fa8e9e6d9f26 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1749,9 +1749,28 @@ static bool isKnownExactCastIntToFP(CastInst &I) {
 
   // Easy case - if the source integer type has less bits than the FP mantissa,
   // then the cast must be exact.
-  if (SrcSize <= FPTy->getFPMantissaWidth())
+  int DestNumSigBits = FPTy->getFPMantissaWidth();
+  if (SrcSize <= DestNumSigBits)
     return true;
 
+  // Cast from FP to integer and back to FP is independent of the intermediate
+  // integer width because of poison on overflow.
+  Value *F;
+  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
+    // If this is uitofp (fptosi F), the source needs an extra bit to avoid
+    // potential rounding of negative FP input values.
+    int SrcNumSigBits = F->getType()->getFPMantissaWidth();
+    if (!IsSigned && match(Src, m_FPToSI(m_Value())))
+      SrcNumSigBits++;
+
+    // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
+    // significant bits than the destination (and make sure neither type is
+    // weird -- ppc_fp128).
+    if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
+        SrcNumSigBits <= DestNumSigBits)
+      return true;
+  }
+
   // TODO:
   // Try harder to find if the source integer type has less significant bits.
   return false;

diff  --git a/llvm/test/Transforms/InstCombine/fpextend.ll b/llvm/test/Transforms/InstCombine/fpextend.ll
index 1b6afd3216d7..9fe85e983fb7 100644
--- a/llvm/test/Transforms/InstCombine/fpextend.ll
+++ b/llvm/test/Transforms/InstCombine/fpextend.ll
@@ -309,11 +309,12 @@ define double @ItoFtoF_u25_f32_f64(i25 %i) {
   ret double %r
 }
 
+; UB on overflow guarantees that the input is small enough to fit in i32.
+
 define double @FtoItoFtoF_f32_s32_f32_f64(float %f) {
 ; CHECK-LABEL: @FtoItoFtoF_f32_s32_f32_f64(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi float [[F:%.*]] to i32
-; CHECK-NEXT:    [[X:%.*]] = sitofp i32 [[I]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[X]] to double
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[I]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %i = fptosi float %f to i32
@@ -333,7 +334,7 @@ define double @FtoItoFtoF_f32_u32_f32_f64_extra_uses(float %f) {
 ; CHECK-NEXT:    call void @use_i32(i32 [[I]])
 ; CHECK-NEXT:    [[X:%.*]] = uitofp i32 [[I]] to float
 ; CHECK-NEXT:    call void @use_f32(float [[X]])
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[X]] to double
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[I]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %i = fptoui float %f to i32
@@ -349,8 +350,7 @@ define double @FtoItoFtoF_f32_u32_f32_f64_extra_uses(float %f) {
 define <3 x double> @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64(<3 x half> %f) {
 ; CHECK-LABEL: @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi <3 x half> [[F:%.*]] to <3 x i32>
-; CHECK-NEXT:    [[X:%.*]] = sitofp <3 x i32> [[I]] to <3 x float>
-; CHECK-NEXT:    [[R:%.*]] = fpext <3 x float> [[X]] to <3 x double>
+; CHECK-NEXT:    [[R:%.*]] = sitofp <3 x i32> [[I]] to <3 x double>
 ; CHECK-NEXT:    ret <3 x double> [[R]]
 ;
   %i = fptosi <3 x half> %f to <3 x i32>
@@ -359,11 +359,12 @@ define <3 x double> @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64(<3 x half> %f) {
   ret <3 x double> %r
 }
 
+; Wider than double is ok.
+
 define fp128 @FtoItoFtoF_f32_s64_f64_f128(float %f) {
 ; CHECK-LABEL: @FtoItoFtoF_f32_s64_f64_f128(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi float [[F:%.*]] to i64
-; CHECK-NEXT:    [[X:%.*]] = sitofp i64 [[I]] to double
-; CHECK-NEXT:    [[R:%.*]] = fpext double [[X]] to fp128
+; CHECK-NEXT:    [[R:%.*]] = sitofp i64 [[I]] to fp128
 ; CHECK-NEXT:    ret fp128 [[R]]
 ;
   %i = fptosi float %f to i64
@@ -372,11 +373,12 @@ define fp128 @FtoItoFtoF_f32_s64_f64_f128(float %f) {
   ret fp128 %r
 }
 
+; Target-specific type is ok.
+
 define x86_fp80 @FtoItoFtoF_f64_u54_f64_f80(double %f) {
 ; CHECK-LABEL: @FtoItoFtoF_f64_u54_f64_f80(
 ; CHECK-NEXT:    [[I:%.*]] = fptoui double [[F:%.*]] to i54
-; CHECK-NEXT:    [[X:%.*]] = uitofp i54 [[I]] to double
-; CHECK-NEXT:    [[R:%.*]] = fpext double [[X]] to x86_fp80
+; CHECK-NEXT:    [[R:%.*]] = uitofp i54 [[I]] to x86_fp80
 ; CHECK-NEXT:    ret x86_fp80 [[R]]
 ;
   %i = fptoui double %f to i54
@@ -385,11 +387,12 @@ define x86_fp80 @FtoItoFtoF_f64_u54_f64_f80(double %f) {
   ret x86_fp80 %r
 }
 
+; Weird target-specific type is ok (not possible to extend *from* that type).
+
 define ppc_fp128 @FtoItoFtoF_f64_u54_f64_p128(double %f) {
 ; CHECK-LABEL: @FtoItoFtoF_f64_u54_f64_p128(
 ; CHECK-NEXT:    [[I:%.*]] = fptoui double [[F:%.*]] to i54
-; CHECK-NEXT:    [[X:%.*]] = uitofp i54 [[I]] to double
-; CHECK-NEXT:    [[R:%.*]] = fpext double [[X]] to ppc_fp128
+; CHECK-NEXT:    [[R:%.*]] = uitofp i54 [[I]] to ppc_fp128
 ; CHECK-NEXT:    ret ppc_fp128 [[R]]
 ;
   %i = fptoui double %f to i54
@@ -398,11 +401,12 @@ define ppc_fp128 @FtoItoFtoF_f64_u54_f64_p128(double %f) {
   ret ppc_fp128 %r
 }
 
+; Unsigned to signed is ok because signed int has smaller magnitude.
+
 define double @FtoItoFtoF_f32_us32_f32_f64(float %f) {
 ; CHECK-LABEL: @FtoItoFtoF_f32_us32_f32_f64(
 ; CHECK-NEXT:    [[I:%.*]] = fptoui float [[F:%.*]] to i32
-; CHECK-NEXT:    [[X:%.*]] = sitofp i32 [[I]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[X]] to double
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[I]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %i = fptoui float %f to i32
@@ -411,6 +415,8 @@ define double @FtoItoFtoF_f32_us32_f32_f64(float %f) {
   ret double %r
 }
 
+; Negative test: consider -1.0
+
 define double @FtoItoFtoF_f32_su32_f32_f64(float %f) {
 ; CHECK-LABEL: @FtoItoFtoF_f32_su32_f32_f64(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi float [[F:%.*]] to i32