[PATCH] D45862: [InstCombine] Relax restriction in foldSelectInstWithICmp for sake of smaller code size
Max Kazantsev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 19 22:16:01 PDT 2018
mkazantsev created this revision.
mkazantsev added reviewers: craig.topper, lebedev.ri, spatel, reames.
Currently, when we process the pattern like
%cmp = icmp slt i64 %x, 0
select i1 %cmp, i32 -1, 32 1
We handle it in a general case with power of two being in the 3rd pard of select, and
the generated code for this case looks like:
%1 = lshr i64 %x, 62
%2 = trunc i64 %1 to i32
%3 = and i32 %2, 2
%4 = xor i32 %3, 2
%5 = add nsw i32 %4, -1
However we have a smarter logic specifically with comparison with zero in foldSelectInstWithICmp
which did not apply because of type mismatch between `%x` and `1`. If we lift the restriction and
handle this case separately, we end up with a better pattern:
%1 = ashr i64 %x, 63
%2 = trunc i64 %1 to i32
%3 = and i32 %2, -24
%4 = or i32 %3, 19
This is 1 instruction less than what we have now.
This patch relaxes the restricton on types, allowing now `%x` be wider than the result of select.
We can in theory also allow `1` to be wider (no functional problems with that), however when I
did, in some cases we end up with a worse code (see the comment). The problem looks irrelevant
to this change which is a pure improvement. We can yield the last restriction when other problematic
transforms are fixed.
https://reviews.llvm.org/D45862
Files:
lib/Transforms/InstCombine/InstCombineSelect.cpp
test/Transforms/InstCombine/select-of-bittest.ll
Index: test/Transforms/InstCombine/select-of-bittest.ll
===================================================================
--- test/Transforms/InstCombine/select-of-bittest.ll
+++ test/Transforms/InstCombine/select-of-bittest.ll
@@ -656,3 +656,44 @@
%tmp4 = select i1 %tmp1, i32 %tmp3, i32 1
ret i32 %tmp4
}
+
+define i32 @compare_to_zero_mismatched_types_idiomatic_trunc(i64 %x) {
+
+; CHECK-LABEL: compare_to_zero_mismatched_types_idiomatic_trunc(
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 %x, 63
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[ASHR]] to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[TRUNC]], 1
+; CHECK-NEXT: ret i32 [[OR]]
+
+ %cmp = icmp slt i64 %x, 0
+ %select1 = select i1 %cmp, i32 -1, i32 1
+ ret i32 %select1
+}
+
+define i32 @compare_to_zero_mismatched_types_non_idiomatic_trunc_01(i64 %x) {
+
+; CHECK-LABEL: compare_to_zero_mismatched_types_non_idiomatic_trunc_01(
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 %x, 63
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[ASHR]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TRUNC]], -24
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], 19
+; CHECK-NEXT: ret i32 [[OR]]
+
+ %cmp = icmp slt i64 %x, 0
+ %select1 = select i1 %cmp, i32 -5, i32 19
+ ret i32 %select1
+}
+
+define i32 @compare_to_zero_mismatched_types_non_idiomatic_trunc_02(i64 %x) {
+
+; CHECK-LABEL: compare_to_zero_mismatched_types_non_idiomatic_trunc_02(
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 %x, 63
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[ASHR]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TRUNC]], -25
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[AND]], 20
+; CHECK-NEXT: ret i32 [[ADD]]
+
+ %cmp = icmp slt i64 %x, 0
+ %select1 = select i1 %cmp, i32 -5, i32 20
+ ret i32 %select1
+}
Index: lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -797,12 +797,17 @@
// Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
// and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
- // FIXME: Type and constness constraints could be lifted, but we have to
- // watch code size carefully. We should consider xor instead of
- // sub/add when we decide to do that.
+ // FIXME: Case when CmpLHS is more narrow than TrueVal is also valid, but
+ // it seems that with some other transforms it may end up with worse
+ // IR (containing both trunc and sext where we could have none). It
+ // looks like a problem in some other transforms (see test71 in
+ // select-with-bitwise-ops.ll). Lift this restriction once they are
+ // fixed.
// TODO: Merge this with foldSelectICmpAnd somehow.
if (CmpLHS->getType()->isIntOrIntVectorTy() &&
- CmpLHS->getType() == TrueVal->getType()) {
+ TrueVal->getType()->isIntOrIntVectorTy() &&
+ CmpLHS->getType()->getScalarSizeInBits() >=
+ TrueVal->getType()->getScalarSizeInBits()) {
const APInt *C1, *C2;
if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) {
ICmpInst::Predicate Pred = ICI->getPredicate();
@@ -818,14 +823,19 @@
// This shift results in either -1 or 0.
Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1);
+ // Convert to the destination type if needed.
+ Value *AShrTyped = AShr;
+ if (CmpLHS->getType() != TrueVal->getType())
+ AShrTyped = Builder.CreateSExtOrTrunc(AShr, TrueVal->getType());
// Check if we can express the operation with a single or.
if (C2->isAllOnesValue())
- return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1));
+ return replaceInstUsesWith(SI, Builder.CreateOr(AShrTyped, *C1));
- Value *And = Builder.CreateAnd(AShr, *C2 - *C1);
- return replaceInstUsesWith(SI, Builder.CreateAdd(And,
- ConstantInt::get(And->getType(), *C1)));
+ Value *And = Builder.CreateAnd(AShrTyped, *C2 - *C1);
+ return replaceInstUsesWith(
+ SI,
+ Builder.CreateAdd(And, ConstantInt::get(And->getType(), *C1)));
}
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45862.143226.patch
Type: text/x-patch
Size: 4300 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180420/669b9095/attachment.bin>
More information about the llvm-commits
mailing list