[PATCH] D79369: [InstCombine] "X - (X / C) * C == 0" to "X & C-1 == 0"
Egor Bogatov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 4 15:05:31 PDT 2020
EgorBo created this revision.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.
EgorBo added reviewers: lebedev.ri, spatel.
EgorBo edited the summary of this revision.
"X % C == 0" is optimized to "X & C-1 == 0" (where C is a power-of-two)
However, "X % Y" can also be represented as "X - (X / Y) * Y" so if I rewrite the initial expression:
"X - (X / C) * C == 0" it's not currently optimized to "X & C-1 == 0", see godbolt: https://godbolt.org/z/KzuXUj
This is my first contribution to LLVM so I hope I didn't mess things up
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D79369
Files:
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/lib/Transforms/InstCombine/InstCombineInternal.h
llvm/test/Transforms/InstCombine/icmp-div-constant.ll
Index: llvm/test/Transforms/InstCombine/icmp-div-constant.ll
===================================================================
--- llvm/test/Transforms/InstCombine/icmp-div-constant.ll
+++ llvm/test/Transforms/InstCombine/icmp-div-constant.ll
@@ -38,6 +38,19 @@
ret i1 %r
}
+define i1 @is_rem32_pos_decomposed_i8(i8 %x) {
+; CHECK-LABEL: @is_rem32_pos_i8(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -97
+; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %d = sdiv i8 %x, 32
+ %m = mul nsw i8 %d, 32
+ %s = sub nsw i8 %x, %m
+ %r = icmp eq i8 %s, 0
+ ret i1 %r
+}
+
; i16 -32765 == 32771 == 0b1000000000000011
define i1 @is_rem4_neg_i16(i16 %x) {
Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -934,6 +934,7 @@
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
Instruction *foldICmpEquality(ICmpInst &Cmp);
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
+ Instruction *foldIRemByPowerOfTwoDecomposedToBitTest(ICmpInst &I);
Instruction *foldSignBitTest(ICmpInst &I);
Instruction *foldICmpWithZero(ICmpInst &Cmp);
Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1348,7 +1348,8 @@
Value *X, *Y, *Zero;
if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
m_CombineAnd(m_Zero(), m_Value(Zero)))))
- return nullptr;
+ // try to recognize a decomposed version
+ return foldIRemByPowerOfTwoDecomposedToBitTest(I);
if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
return nullptr;
// This may increase instruction count, we don't enforce that Y is a constant.
@@ -1357,6 +1358,41 @@
return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
}
+/// Fold decomposed version of "X % C == 0" to "X & C-1 == 0".
+/// "X % C" can also be represented as "X - (X / C) * C" which is optimized
+/// into "((X / -C1) >> C2)) + X" so the latter can be folded to
+/// "X & C-1" for icmp eq/ne 0
+Instruction *
+InstCombiner::foldIRemByPowerOfTwoDecomposedToBitTest(ICmpInst &I) {
+ // This fold is only valid for equality predicates.
+ if (!I.isEquality())
+ return nullptr;
+
+ ICmpInst::Predicate Pred;
+ Value *X, *Y, *Zero;
+ const APInt *C1, *C2;
+ if (!match(&I,
+ m_ICmp(Pred,
+ m_Add(m_Shl(m_SDiv(m_Value(X), m_APInt(C1)), m_APInt(C2)),
+ m_Value(Y)),
+ m_Value(Zero))))
+ return nullptr;
+
+ // C1 should be some negative power of two number
+ if ((X != Y) || !C1->isNegative() || !C1->abs().isPowerOf2())
+ return nullptr;
+
+ // 1 << C2 == C1
+ APInt one(C2->getBitWidth(), 1);
+ if ((C1->abs() != one.shl(*C2)) || C2->sle(one))
+ return nullptr;
+
+ // Replace with "X & C-1 ==/!= 0"
+ uint64_t AndMask = C1->abs().getZExtValue() - 1;
+ Value *And = Builder.CreateAnd(X, ConstantInt::get(X->getType(), AndMask));
+ return new ICmpInst(Pred, And, Zero);
+}
+
/// Fold equality-comparison between zero and any (maybe truncated) right-shift
/// by one-less-than-bitwidth into a sign test on the original value.
Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D79369.261936.patch
Type: text/x-patch
Size: 3565 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200504/e9278c6a/attachment.bin>
More information about the llvm-commits
mailing list