[llvm] r364738 - [InstCombine] Omit 'urem' where possible
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 02:41:43 PDT 2019
Author: lebedevri
Date: Mon Jul 1 02:41:43 2019
New Revision: 364738
URL: http://llvm.org/viewvc/llvm-project?rev=364738&view=rev
Log:
[InstCombine] Omit 'urem' where possible
This was added in D63390 / rL364286 to backend,
but it makes sense to also handle it in middle-end.
https://rise4fun.com/Alive/Zsln
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=364738&r1=364737&r2=364738&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Mon Jul 1 02:41:43 2019
@@ -1317,14 +1317,16 @@ static Instruction *processUGT_ADDCST_AD
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
-// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+// Handle icmp pred X, 0
Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
CmpInst::Predicate Pred = Cmp.getPredicate();
- Value *X = Cmp.getOperand(0);
+ if (!match(Cmp.getOperand(1), m_Zero()))
+ return nullptr;
- if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
+ // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+ if (Pred == ICmpInst::ICMP_SGT) {
Value *A, *B;
- SelectPatternResult SPR = matchSelectPattern(X, A, B);
+ SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
if (SPR.Flavor == SPF_SMIN) {
if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
@@ -1332,6 +1334,20 @@ Instruction *InstCombiner::foldICmpWithZ
return new ICmpInst(Pred, A, Cmp.getOperand(1));
}
}
+
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ Value *X, *Y;
+ if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
+ ICmpInst::isEquality(Pred)) {
+ KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+ KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return new ICmpInst(Pred, X, Cmp.getOperand(1));
+ }
+
return nullptr;
}
Modified: llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll?rev=364738&r1=364737&r2=364738&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll Mon Jul 1 02:41:43 2019
@@ -14,8 +14,7 @@
define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) {
; CHECK-LABEL: @p0_scalar_urem_by_const(
; CHECK-NEXT: [[T0:%.*]] = and i32 [[X:%.*]], 128
-; CHECK-NEXT: [[T1:%.*]] = urem i32 [[T0]], 6
-; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T0]], 0
; CHECK-NEXT: ret i1 [[T2]]
;
%t0 = and i32 %x, 128 ; clearly a power-of-two or zero
@@ -27,9 +26,7 @@ define i1 @p0_scalar_urem_by_const(i32 %
define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) {
; CHECK-LABEL: @p1_scalar_urem_by_nonconst(
; CHECK-NEXT: [[T0:%.*]] = and i32 [[X:%.*]], 128
-; CHECK-NEXT: [[T1:%.*]] = or i32 [[Y:%.*]], 6
-; CHECK-NEXT: [[T2:%.*]] = urem i32 [[T0]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0
+; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T0]], 0
; CHECK-NEXT: ret i1 [[T3]]
;
%t0 = and i32 %x, 128 ; clearly a power-of-two or zero
@@ -76,8 +73,7 @@ define i1 @p3_scalar_shifted2_urem_by_co
define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @p4_vector_urem_by_const__splat(
; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128>
-; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> [[T0]], <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T1]], zeroinitializer
+; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer
; CHECK-NEXT: ret <4 x i1> [[T2]]
;
%t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
@@ -115,8 +111,7 @@ define <4 x i1> @p6_vector_urem_by_const
define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_undef2(
; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128>
-; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> [[T0]], <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T1]], <i32 0, i32 0, i32 undef, i32 0>
+; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], <i32 0, i32 0, i32 undef, i32 0>
; CHECK-NEXT: ret <4 x i1> [[T2]]
;
%t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
More information about the llvm-commits
mailing list