[llvm] r337190 - [InstCombine] Fold 'check for [no] signed truncation' pattern
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 16 09:45:42 PDT 2018
Author: lebedevri
Date: Mon Jul 16 09:45:42 2018
New Revision: 337190
URL: http://llvm.org/viewvc/llvm-project?rev=337190&view=rev
Log:
[InstCombine] Fold 'check for [no] signed truncation' pattern
Summary:
[[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]]
As discussed in https://reviews.llvm.org/D49179#1158957 and later,
the IR for 'check for [no] signed truncation' pattern can be improved:
https://rise4fun.com/Alive/gBf
^ that pattern will be produced by Implicit Integer Truncation sanitizer,
https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530
in signed case, therefore it is probably a good idea to improve it.
Proofs for this transform: https://rise4fun.com/Alive/mgu
This transform is surprisingly frustrating.
This does not deal with non-splat shift amounts, or with undef shift amounts.
I've outlined what i think the solution should be:
```
// Potential handling of non-splats: for each element:
// * if both are undef, replace with constant 0.
// Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
// * if both are not undef, and are different, bailout.
// * else, only one is undef, then pick the non-undef one.
```
The DAGCombine will reverse this transform, see
https://reviews.llvm.org/D49266
Reviewers: spatel, craig.topper
Reviewed By: spatel
Subscribers: JDevlieghere, rkruppe, llvm-commits
Differential Revision: https://reviews.llvm.org/D49320
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
llvm/trunk/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=337190&r1=337189&r2=337190&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Mon Jul 16 09:45:42 2018
@@ -2945,6 +2945,72 @@ static Value *foldICmpWithLowBitMaskedVa
return Builder.CreateICmp(DstPred, X, M);
}
+/// Some comparisons can be simplified.
+/// In this case, we are looking for comparisons that look like
+/// a check for a lossy signed truncation.
+/// Folds: (MaskedBits is a constant.)
+/// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
+/// Into:
+/// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
+/// Where KeptBits = bitwidth(%x) - MaskedBits
+static Value *
+foldICmpWithTruncSignExtendedVal(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate SrcPred;
+ Value *X;
+ const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
+ // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
+ if (!match(&I, m_c_ICmp(SrcPred,
+ m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
+ m_APInt(C1))),
+ m_Deferred(X))))
+ return nullptr;
+
+ // Potential handling of non-splats: for each element:
+ // * if both are undef, replace with constant 0.
+ // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
+ // * if both are not undef, and are different, bailout.
+ // * else, only one is undef, then pick the non-undef one.
+
+ // The shift amount must be equal.
+ if (*C0 != *C1)
+ return nullptr;
+ const uint64_t MaskedBits = C0->getZExtValue();
+ assert(MaskedBits && "shift of %x by zero should be folded to %x already.");
+
+ ICmpInst::Predicate DstPred;
+ switch (SrcPred) {
+ case ICmpInst::Predicate::ICMP_EQ:
+ // ((%x << MaskedBits) a>> MaskedBits) == %x
+ // =>
+ // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
+ DstPred = ICmpInst::Predicate::ICMP_ULT;
+ break;
+ case ICmpInst::Predicate::ICMP_NE:
+ // ((%x << MaskedBits) a>> MaskedBits) != %x
+ // =>
+ // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
+ DstPred = ICmpInst::Predicate::ICMP_UGE;
+ break;
+ // FIXME: are more folds possible?
+ default:
+ return nullptr;
+ }
+
+ const uint64_t XBitWidth = C0->getBitWidth();
+ const uint64_t KeptBits = XBitWidth - MaskedBits;
+ const uint64_t ICmpCst = 1UL << KeptBits; // (1 << KeptBits)
+ const uint64_t AddCst = ICmpCst >> 1UL; // (1 << (KeptBits-1))
+
+ auto *XType = X->getType();
+ // (add %x, (1 << (KeptBits-1)))
+ Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
+ // add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
+ Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
+
+ return T1;
+}
+
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3285,6 +3351,9 @@ Instruction *InstCombiner::foldICmpBinOp
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
return replaceInstUsesWith(I, V);
+ if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
+ return replaceInstUsesWith(I, V);
+
return nullptr;
}
Modified: llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll?rev=337190&r1=337189&r2=337190&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll Mon Jul 16 09:45:42 2018
@@ -15,9 +15,8 @@
define i1 @p0(i8 %x) {
; CHECK-LABEL: @p0(
-; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16
; CHECK-NEXT: ret i1 [[TMP2]]
;
%tmp0 = shl i8 %x, 4
@@ -32,9 +31,8 @@ define i1 @p0(i8 %x) {
define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 4, i8 4>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 8, i8 8>
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 16, i8 16>
; CHECK-NEXT: ret <2 x i1> [[TMP2]]
;
%tmp0 = shl <2 x i8> %x, <i8 4, i8 4>
@@ -104,9 +102,8 @@ declare i8 @gen8()
define i1 @c0() {
; CHECK-LABEL: @c0(
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[X]], [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16
; CHECK-NEXT: ret i1 [[TMP2]]
;
%x = call i8 @gen8()
@@ -126,8 +123,8 @@ define i1 @n_oneuse0(i8 %x) {
; CHECK-LABEL: @n_oneuse0(
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 4
; CHECK-NEXT: call void @use8(i8 [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16
; CHECK-NEXT: ret i1 [[TMP2]]
;
%tmp0 = shl i8 %x, 4
Modified: llvm/trunk/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll?rev=337190&r1=337189&r2=337190&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll Mon Jul 16 09:45:42 2018
@@ -15,9 +15,8 @@
define i1 @p0(i8 %x) {
; CHECK-LABEL: @p0(
-; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 15
; CHECK-NEXT: ret i1 [[TMP2]]
;
%tmp0 = shl i8 %x, 4
@@ -32,9 +31,8 @@ define i1 @p0(i8 %x) {
define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 4, i8 4>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 8, i8 8>
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i8> [[TMP1]], <i8 15, i8 15>
; CHECK-NEXT: ret <2 x i1> [[TMP2]]
;
%tmp0 = shl <2 x i8> %x, <i8 4, i8 4>
@@ -104,9 +102,8 @@ declare i8 @gen8()
define i1 @c0() {
; CHECK-LABEL: @c0(
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[X]], [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 15
; CHECK-NEXT: ret i1 [[TMP2]]
;
%x = call i8 @gen8()
@@ -126,8 +123,8 @@ define i1 @n_oneuse0(i8 %x) {
; CHECK-LABEL: @n_oneuse0(
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 4
; CHECK-NEXT: call void @use8(i8 [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 15
; CHECK-NEXT: ret i1 [[TMP2]]
;
%tmp0 = shl i8 %x, 4
More information about the llvm-commits
mailing list