[llvm] 404479b - [InstCombine] Use known bits to determine exact int->fp cast

Wed Jun 29 18:48:16 PDT 2022

Author: zhongyunde
Date: 2022-06-30T09:45:11+08:00
New Revision: 404479b4b042ab6c37cce67cea25eaa4d4d7ad43

URL: https://github.com/llvm/llvm-project/commit/404479b4b042ab6c37cce67cea25eaa4d4d7ad43
DIFF: https://github.com/llvm/llvm-project/commit/404479b4b042ab6c37cce67cea25eaa4d4d7ad43.diff

LOG: [InstCombine] Use known bits to determine exact int->fp cast

Reviewed By: spatel, nikic

Differential Revision: https://reviews.llvm.org/D127854

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/fpcast.ll
    llvm/test/Transforms/InstCombine/sitofp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3de8f08e4e048..e9e779b8619b6 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1720,7 +1720,7 @@ static Type *getMinimumFPType(Value *V) {
 
 /// Return true if the cast from integer to FP can be proven to be exact for all
 /// possible inputs (the conversion does not lose any precision).
-static bool isKnownExactCastIntToFP(CastInst &I) {
+static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
   CastInst::CastOps Opcode = I.getOpcode();
   assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
          "Unexpected cast");
@@ -1757,6 +1757,12 @@ static bool isKnownExactCastIntToFP(CastInst &I) {
   // TODO:
   // Try harder to find if the source integer type has less significant bits.
   // For example, compute number of sign bits or compute low bit mask.
+  KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
+  int LowBits =
+      (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros();
+  if (LowBits <= DestNumSigBits)
+    return true;
+
   return false;
 }
 
@@ -1937,7 +1943,7 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
   Value *Src = FPT.getOperand(0);
   if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
     auto *FPCast = cast<CastInst>(Src);
-    if (isKnownExactCastIntToFP(*FPCast))
+    if (isKnownExactCastIntToFP(*FPCast, *this))
       return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
   }
 
@@ -1951,7 +1957,7 @@ Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
   Value *Src = FPExt.getOperand(0);
   if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
     auto *FPCast = cast<CastInst>(Src);
-    if (isKnownExactCastIntToFP(*FPCast))
+    if (isKnownExactCastIntToFP(*FPCast, *this))
       return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
   }
 
@@ -1978,7 +1984,7 @@ Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
 
   // This means this is also safe for a signed input and unsigned output, since
   // a negative input would lead to undefined behavior.
-  if (!isKnownExactCastIntToFP(*OpI)) {
+  if (!isKnownExactCastIntToFP(*OpI, *this)) {
     // The first cast may not round exactly based on the source integer width
     // and FP width, but the overflow UB rules can still allow this to fold.
     // If the destination type is narrow, that means the intermediate FP value

diff  --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index f67514c0f427a..3e5c6fd20b12d 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -170,8 +170,7 @@ define half @sint_to_fptrunc(i32 %x) {
 define half @masked_sint_to_fptrunc1(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fptrunc1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -183,8 +182,7 @@ define half @masked_sint_to_fptrunc1(i32 %x) {
 define half @masked_sint_to_fptrunc2(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fptrunc2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = lshr i32 %x, 8
@@ -220,8 +218,7 @@ define double @sint_to_fpext(i32 %x) {
 define double @masked_sint_to_fpext1(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fpext1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -233,8 +230,7 @@ define double @masked_sint_to_fpext1(i32 %x) {
 define double @masked_sint_to_fpext2(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fpext2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = lshr i32 %x, 8
@@ -270,8 +266,7 @@ define half @uint_to_fptrunc(i32 %x) {
 define half @masked_uint_to_fptrunc1(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fptrunc1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -283,8 +278,7 @@ define half @masked_uint_to_fptrunc1(i32 %x) {
 define half @masked_uint_to_fptrunc2(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fptrunc2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = lshr i32 %x, 8
@@ -320,8 +314,7 @@ define double @uint_to_fpext(i32 %x) {
 define double @masked_uint_to_fpext1(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fpext1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -333,8 +326,7 @@ define double @masked_uint_to_fpext1(i32 %x) {
 define double @masked_uint_to_fpext2(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fpext2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = lshr i32 %x, 8

diff  --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll
index 2427979260f94..cec3931c0eda1 100644
--- a/llvm/test/Transforms/InstCombine/sitofp.ll
+++ b/llvm/test/Transforms/InstCombine/sitofp.ll
@@ -218,16 +218,38 @@ define i55 @test19(i64 %A) {
   ret i55 %C
 }
 
-; TODO: The mask guarantees that the input is small enough to eliminate the FP casts.
+; The mask guarantees that the input is small enough to eliminate the FP casts.
 
 define i25 @masked_input(i25 %A) {
 ; CHECK-LABEL: @masked_input(
 ; CHECK-NEXT:    [[M:%.*]] = and i25 [[A:%.*]], 65535
+; CHECK-NEXT:    ret i25 [[M]]
+;
+  %m = and i25 %A, 65535
+  %B = uitofp i25 %m to float
+  %C = fptoui float %B to i25
+  ret i25 %C
+}
+
+define i25 @max_masked_input(i25 %A) {
+; CHECK-LABEL: @max_masked_input(
+; CHECK-NEXT:    [[M:%.*]] = and i25 [[A:%.*]], 16777215
+; CHECK-NEXT:    ret i25 [[M]]
+;
+  %m = and i25 %A, 16777215    ; max intermediate 16777215 (= 1 << 24)-1
+  %B = uitofp i25 %m to float
+  %C = fptoui float %B to i25
+  ret i25 %C
+}
+
+define i25 @overflow_masked_input(i25 %A) {
+; CHECK-LABEL: @overflow_masked_input(
+; CHECK-NEXT:    [[M:%.*]] = and i25 [[A:%.*]], -16777216
 ; CHECK-NEXT:    [[B:%.*]] = uitofp i25 [[M]] to float
 ; CHECK-NEXT:    [[C:%.*]] = fptoui float [[B]] to i25
 ; CHECK-NEXT:    ret i25 [[C]]
 ;
-  %m = and i25 %A, 65535
+  %m = and i25 %A, 16777216  ; Negative test - intermediate 16777216 (= 1 << 24)
   %B = uitofp i25 %m to float
   %C = fptoui float %B to i25
   ret i25 %C