[llvm] bfb5d2e - [InstCombine] Transform (A > 0) | (A < 0) -> zext (A != 0) fold

Thu Jul 6 00:03:05 PDT 2023

Author: XChy
Date: 2023-07-06T02:02:43-05:00
New Revision: bfb5d2e6f854f16eb4f4d49ce8c0160f89584a00

URL: https://github.com/llvm/llvm-project/commit/bfb5d2e6f854f16eb4f4d49ce8c0160f89584a00
DIFF: https://github.com/llvm/llvm-project/commit/bfb5d2e6f854f16eb4f4d49ce8c0160f89584a00.diff

LOG: [InstCombine] Transform (A > 0) | (A < 0) -> zext (A != 0) fold

[InstCombine] Transform (A > 0) | (A < 0) -> zext (A != 0) fold

This extends **foldCastedBitwiseLogic** to handle the similar cases.

Actually, for `(A > B) | (A < B)`, when B != 0, it can be optimized to `zext( A != B )` by **foldAndOrOfICmpsUsingRanges**.
However, when B = 0, **transformZExtICmp** will transform `zext(A < 0) to i32` into `A << 31`,
which cannot be optimized by **foldAndOrOfICmpsUsingRanges**.

Because I'm new to LLVM and has no concise knowledge about how LLVM decides the order of optimization,
I choose to extend **foldCastedBitwiseLogic** to fold `( A << (X - 1) ) | ((A > 0) zext to iX) -> (A != 0) zext to iX`.

And the equivalent fold follows:
```
 A << (X - 1) ) | ((A > 0) zext to iX
  -> A < 0 | A > 0
  -> (A != 0) zext to iX
```

It's proved by [[https://alive2.llvm.org/ce/z/33HzjE|alive-tv]]

Related issue:
[[https://github.com/llvm/llvm-project/issues/62586  | (a > b) | (a < b) is not simplified only for the case b=0 ]]

Reviewed By: goldstein.w.n

Differential Revision: https://reviews.llvm.org/D154126

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and-or-icmps.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c5c3cad9e5d2ed..6d2faff02e7064 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1712,6 +1712,26 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
   assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // ( A << (X - 1) ) | ((A > 0) zext to iX)
+  // <=> A < 0 | A > 0
+  // <=> (A != 0) zext to iX
+  Value *A;
+  ICmpInst::Predicate Pred;
+
+  auto MatchOrZExtICmp = [&](Value *Op0, Value *Op1) -> bool {
+    return match(Op0, m_LShr(m_Value(A), m_SpecificInt(Op0->getType()->getScalarSizeInBits() - 1))) &&
+           match(Op1, m_ZExt(m_ICmp(Pred, m_Specific(A), m_Zero())));
+  };
+
+  if (LogicOpc == Instruction::Or &&
+      (MatchOrZExtICmp(Op0, Op1) || MatchOrZExtICmp(Op1, Op0)) &&
+      Pred == ICmpInst::ICMP_SGT) {
+      Value *Cmp =
+          Builder.CreateICmpNE(A, Constant::getNullValue(A->getType()));
+      return new ZExtInst(Cmp, A->getType());
+  }
+
   CastInst *Cast0 = dyn_cast<CastInst>(Op0);
   if (!Cast0)
     return nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index 0fd54fea25091d..4f5639c2e93d7b 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -2571,10 +2571,8 @@ define <2 x i1> @icmp_ne_m1_or_ne_m1(<2 x i8> %x, <2 x i8> %y) {
 
 define i32 @icmp_slt_0_or_icmp_sgt_0_i32(i32 %x) {
 ; CHECK-LABEL: @icmp_slt_0_or_icmp_sgt_0_i32(
-; CHECK-NEXT:    [[B:%.*]] = icmp sgt i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i32 [[X]], 31
-; CHECK-NEXT:    [[D:%.*]] = zext i1 [[B]] to i32
-; CHECK-NEXT:    [[E:%.*]] = or i32 [[X_LOBIT]], [[D]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = zext i1 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %A = icmp slt i32 %x, 0
@@ -2587,10 +2585,8 @@ define i32 @icmp_slt_0_or_icmp_sgt_0_i32(i32 %x) {
 
 define i64 @icmp_slt_0_or_icmp_sgt_0_i64(i64 %x) {
 ; CHECK-LABEL: @icmp_slt_0_or_icmp_sgt_0_i64(
-; CHECK-NEXT:    [[B:%.*]] = icmp sgt i64 [[X:%.*]], 0
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i64 [[X]], 63
-; CHECK-NEXT:    [[D:%.*]] = zext i1 [[B]] to i64
-; CHECK-NEXT:    [[E:%.*]] = or i64 [[X_LOBIT]], [[D]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[X:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = zext i1 [[TMP1]] to i64
 ; CHECK-NEXT:    ret i64 [[E]]
 ;
   %A = icmp slt i64 %x, 0
@@ -2659,10 +2655,8 @@ define i64 @icmp_slt_0_or_icmp_sgt_0_i64_fail3(i64 %x) {
 
 define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2(<2 x i64> %x) {
 ; CHECK-LABEL: @icmp_slt_0_or_icmp_sgt_0_i64x2(
-; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i64> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i64> [[X]], <i64 63, i64 63>
-; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[B]] to <2 x i64>
-; CHECK-NEXT:    [[E:%.*]] = or <2 x i64> [[X_LOBIT]], [[D]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i64> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[E]]
 ;
   %A = icmp slt <2 x i64> %x, <i64 0,i64 0>