[llvm] de7a6ae - [InstCombine] Optimize shl+lshr+and conversion pattern
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 18:58:17 PDT 2022
Author: chenglin.bi
Date: 2022-06-10T09:36:58+08:00
New Revision: de7a6ae1ffc36baa16188d3dd5befa385d05700d
URL: https://github.com/llvm/llvm-project/commit/de7a6ae1ffc36baa16188d3dd5befa385d05700d
DIFF: https://github.com/llvm/llvm-project/commit/de7a6ae1ffc36baa16188d3dd5befa385d05700d.diff
LOG: [InstCombine] Optimize shl+lshr+and conversion pattern
if `C1` and `C3` are pow2 and `Log2(C3)+C2 < BitWidth`:
((C1 << X) >> C2) & C3 -> X == (Log2(C3)+C2-Log2(C1)) ? C3 : 0;
https://alive2.llvm.org/ce/z/Pus5bd
Fix issue https://github.com/llvm/llvm-project/issues/55739
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D126617
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/test/Transforms/InstCombine/and.ll
llvm/test/Transforms/InstCombine/icmp-and-shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 01097a0fd0c11..d04cdd8e1ec26 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1904,6 +1904,30 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return new ZExtInst(NewBO, Ty);
}
}
+
+ Constant *C1, *C2;
+ const APInt *C3 = C;
+ Value *X;
+ if (C3->isPowerOf2() &&
+ match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)),
+ m_ImmConstant(C2)))) &&
+ match(C1, m_Power2())) {
+ Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);
+ Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros());
+ Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3);
+ KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr);
+ if (KnownLShrc.getMaxValue().ult(Width)) {
+ // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth:
+ // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0
+ Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1);
+ Value *Cmp = Builder.CreateICmpEQ(X, CmpC);
+ return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),
+ ConstantInt::getNullValue(Ty));
+ }
+ // TODO: Symmetrical case
+ // iff C1,C3 is pow2 and Log2(C3) >= C2:
+ // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0
+ }
}
if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index c06074c79fc52..2a66b5a528cc2 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1778,9 +1778,8 @@ define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) {
define i16 @shl_lshr_pow2_const_case1(i16 %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1(
-; CHECK-NEXT: [[SHL:%.*]] = shl i16 4, [[X:%.*]]
-; CHECK-NEXT: [[LSHR:%.*]] = lshr i16 [[SHL]], 6
-; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0
; CHECK-NEXT: ret i16 [[R]]
;
%shl = shl i16 4, %x
@@ -1791,9 +1790,8 @@ define i16 @shl_lshr_pow2_const_case1(i16 %x) {
define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec(
-; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 4, i16 4, i16 4>, [[X:%.*]]
-; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 6, i16 6, i16 6>
-; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 7, i16 7>
+; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
; CHECK-NEXT: ret <3 x i16> [[R]]
;
%shl = shl <3 x i16> <i16 4, i16 4, i16 4>, %x
@@ -1804,22 +1802,20 @@ define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {
define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec(<3 x i16> %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1_non_uniform_vec(
-; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 16, i16 8, i16 4>, [[X:%.*]]
-; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 4, i16 3>
-; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 16, i16 4>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 6, i16 1>
+; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
; CHECK-NEXT: ret <3 x i16> [[R]]
;
- %shl = shl <3 x i16> <i16 16, i16 8, i16 4>, %x
- %lshr = lshr <3 x i16> %shl, <i16 5, i16 4, i16 3>
- %r = and <3 x i16> %lshr, <i16 8, i16 16, i16 4>
+ %shl = shl <3 x i16> <i16 2, i16 8, i16 32>, %x
+ %lshr = lshr <3 x i16> %shl, <i16 5, i16 6, i16 3>
+ %r = and <3 x i16> %lshr, <i16 8, i16 8, i16 8>
ret <3 x i16> %r
}
define <3 x i16> @shl_lshr_pow2_const_case1_undef1_vec(<3 x i16> %x) {
; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef1_vec(
-; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 undef, i16 16, i16 16>, [[X:%.*]]
-; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 5, i16 5>
-; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 8, i16 4, i16 4>
+; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
; CHECK-NEXT: ret <3 x i16> [[R]]
;
%shl = shl <3 x i16> <i16 undef, i16 16, i16 16>, %x
@@ -1868,6 +1864,8 @@ define i16 @shl_lshr_pow2_const_case2(i16 %x) {
ret i16 %r
}
+; TODO: this pattern can be transform to icmp+select
+
define i16 @shl_lshr_pow2_not_const_case2(i16 %x) {
; CHECK-LABEL: @shl_lshr_pow2_not_const_case2(
; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2, [[X:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
index 9c9f1a99e82dd..87b594231daf4 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
@@ -57,11 +57,9 @@ define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) {
define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {
; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2(
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4
-; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%shl = shl i32 2, %0
%and = and i32 %shl, 16
@@ -72,11 +70,9 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {
define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {
; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec(
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 4, i32 4>, [[TMP0:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], <i32 4, i32 4>
-; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], <i32 1, i32 1>
-; CHECK-NEXT: ret <2 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%shl = shl <2 x i32> <i32 4, i32 4>, %0
%and = and <2 x i32> %shl, <i32 16, i32 16>
@@ -87,9 +83,8 @@ define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {
define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {
; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2(
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[TMP0:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[SHL]], 4
-; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0:%.*]], 3
+; CHECK-NEXT: [[AND_LOBIT:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[AND_LOBIT]]
;
%shl = shl i32 2, %0
@@ -101,9 +96,8 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {
define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) {
; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec(
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 4, i32 4>, [[TMP0:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[SHL]], <i32 4, i32 4>
-; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT: [[AND_LOBIT:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]]
;
%shl = shl <2 x i32> <i32 4, i32 4>, %0
More information about the llvm-commits
mailing list