[llvm] ec40c8f - [ValueTracking] Improve ComputeNumSignBits to handle Trunc

Tue Jan 3 14:26:27 PST 2023

Author: Owen Anderson
Date: 2023-01-03T15:26:21-07:00
New Revision: ec40c8f6fe8477e22b9a1e5a3140dd3f7b247588

URL: https://github.com/llvm/llvm-project/commit/ec40c8f6fe8477e22b9a1e5a3140dd3f7b247588
DIFF: https://github.com/llvm/llvm-project/commit/ec40c8f6fe8477e22b9a1e5a3140dd3f7b247588.diff

LOG: [ValueTracking] Improve ComputeNumSignBits to handle Trunc

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D140796

Added: 
    llvm/test/Transforms/InstCombine/vector-trunc.ll

Modified: 
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
    llvm/test/Transforms/InstCombine/negated-bitmask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index b563154ecd9fe..d12e4c7cdd165 100644

--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3312,10 +3312,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
       return Tmp;
     }
 
-    case Instruction::Trunc:
-      // FIXME: it's tricky to do anything useful for this, but it is an
-      // important case for targets like X86.
-      break;
+    case Instruction::Trunc: {
+      // If the input contained enough sign bits that some remain after the
+      // truncation, then we can make use of that. Otherwise we don't know
+      // anything.
+      Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+      unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
+      if (Tmp > (OperandTyBits - TyBits))
+        return Tmp - (OperandTyBits - TyBits);
+
+      return 1;
+    }
 
     case Instruction::ExtractElement:
       // Look through extract element. At the moment we keep this simple and

diff  --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
index ec7c3c807d542..e87d90909e84a 100644
--- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
@@ -138,7 +138,7 @@ define i32 @n10(i64 %x) {
 ; CHECK-LABEL: @n10(
 ; CHECK-NEXT:    [[T0_NEG:%.*]] = ashr i64 [[X:%.*]], 63
 ; CHECK-NEXT:    [[T1_NEG:%.*]] = trunc i64 [[T0_NEG]] to i32
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[T1_NEG]], 1
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[T1_NEG]], 1
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %t0 = lshr i64 %x, 63

diff  --git a/llvm/test/Transforms/InstCombine/negated-bitmask.ll b/llvm/test/Transforms/InstCombine/negated-bitmask.ll
index a41a5d9c24af7..fdd8e7e5b262a 100644
--- a/llvm/test/Transforms/InstCombine/negated-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/negated-bitmask.ll
@@ -71,7 +71,7 @@ define i8 @sub_mask1_trunc_lshr(i64 %a0) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[A0:%.*]], 48
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr i64 [[TMP1]], 63
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i8
-; CHECK-NEXT:    [[NEG:%.*]] = add i8 [[TMP3]], 10
+; CHECK-NEXT:    [[NEG:%.*]] = add nsw i8 [[TMP3]], 10
 ; CHECK-NEXT:    ret i8 [[NEG]]
 ;
   %shift = lshr i64 %a0, 15
@@ -86,7 +86,7 @@ define i32 @sub_sext_mask1_trunc_lshr(i64 %a0) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[A0:%.*]], 48
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr i64 [[TMP1]], 63
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i8
-; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[TMP3]], 10
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i8 [[TMP3]], 10
 ; CHECK-NEXT:    [[NEG:%.*]] = zext i8 [[NARROW]] to i32
 ; CHECK-NEXT:    ret i32 [[NEG]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/vector-trunc.ll b/llvm/test/Transforms/InstCombine/vector-trunc.ll
new file mode 100644
index 0000000000000..eeb5a3fdb7398
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-trunc.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define <4 x i16> @trunc_add_nsw(<4 x i32> %0) {
+; CHECK-LABEL: @trunc_add_nsw(
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    ret <4 x i16> [[TMP4]]
+;
+  %2 = ashr <4 x i32> %0, <i32 17, i32 17, i32 17, i32 17>
+  %3 = trunc <4 x i32> %2 to <4 x i16>
+  %4 = add <4 x i16> %3, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %4
+}
+
+define <4 x i16> @trunc_add_no_nsw(<4 x i32> %0) {
+; CHECK-LABEL: @trunc_add_no_nsw(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    ret <4 x i16> [[TMP4]]
+;
+  %2 = ashr <4 x i32> %0, <i32 16, i32 16, i32 16, i32 16>
+  %3 = trunc <4 x i32> %2 to <4 x i16>
+  %4 = add <4 x i16> %3, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %4
+}
+
+define <4 x i16> @trunc_add_mixed(<4 x i32> %0) {
+; CHECK-LABEL: @trunc_add_mixed(
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], <i32 17, i32 16, i32 17, i32 16>
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    ret <4 x i16> [[TMP4]]
+;
+  %2 = ashr <4 x i32> %0, <i32 17, i32 16, i32 17, i32 16>
+  %3 = trunc <4 x i32> %2 to <4 x i16>
+  %4 = add <4 x i16> %3, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %4
+}