[llvm] 310adb6 - [InstCombine] reorder mask folds for efficiency

Mon Jun 13 06:57:19 PDT 2022

Author: Sanjay Patel
Date: 2022-06-13T09:49:57-04:00
New Revision: 310adb658c8c23b988dbdfb77e269133ba51ea2f

URL: https://github.com/llvm/llvm-project/commit/310adb658c8c23b988dbdfb77e269133ba51ea2f
DIFF: https://github.com/llvm/llvm-project/commit/310adb658c8c23b988dbdfb77e269133ba51ea2f.diff

LOG: [InstCombine] reorder mask folds for efficiency

This shows narrowing improvements on the logic tests
(transforms recently added with e247b0e5c921).

This is not a complete fix. That would require adding
folds to visitOr/visitXor. But it enables the expected
transforms for the basic patterns in the affected tests.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c26ea4c0b779..16435bd3cf93 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1796,25 +1796,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
       return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together));
     }
 
-    // If the mask is only needed on one incoming arm, push the 'and' op up.
-    if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||
-        match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
-      APInt NotAndMask(~(*C));
-      BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();
-      if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {
-        // Not masking anything out for the LHS, move mask to RHS.
-        // and ({x}or X, Y), C --> {x}or X, (and Y, C)
-        Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");
-        return BinaryOperator::Create(BinOp, X, NewRHS);
-      }
-      if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {
-        // Not masking anything out for the RHS, move mask to LHS.
-        // and ({x}or X, Y), C --> {x}or (and X, C), Y
-        Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");
-        return BinaryOperator::Create(BinOp, NewLHS, Y);
-      }
-    }
-
     unsigned Width = Ty->getScalarSizeInBits();
     const APInt *ShiftC;
     if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) {
@@ -1912,6 +1893,27 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
       }
     }
 
+    // This is intentionally placed after the narrowing transforms for
+    // efficiency (transform directly to the narrow logic op if possible).
+    // If the mask is only needed on one incoming arm, push the 'and' op up.
+    if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||
+        match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+      APInt NotAndMask(~(*C));
+      BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();
+      if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {
+        // Not masking anything out for the LHS, move mask to RHS.
+        // and ({x}or X, Y), C --> {x}or X, (and Y, C)
+        Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");
+        return BinaryOperator::Create(BinOp, X, NewRHS);
+      }
+      if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {
+        // Not masking anything out for the RHS, move mask to LHS.
+        // and ({x}or X, Y), C --> {x}or (and X, C), Y
+        Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");
+        return BinaryOperator::Create(BinOp, NewLHS, Y);
+      }
+    }
+
     Constant *C1, *C2;
     const APInt *C3 = C;
     Value *X;

diff  --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index 05f3d3bf708e..eed332bf3759 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -862,14 +862,12 @@ define i32 @lowmask_mul_zext(i8 %x, i32 %y) {
   ret i32 %r
 }
 
-; TODO: we could have narrowed the xor
-
 define i32 @lowmask_xor_zext_commute(i8 %x, i32 %p) {
 ; CHECK-LABEL: @lowmask_xor_zext_commute(
 ; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[ZX:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[Y_MASKED:%.*]] = and i32 [[Y]], 255
-; CHECK-NEXT:    [[R:%.*]] = xor i32 [[Y_MASKED]], [[ZX]]
+; CHECK-NEXT:    [[Y_TR:%.*]] = trunc i32 [[Y]] to i8
+; CHECK-NEXT:    [[BO_NARROW:%.*]] = xor i8 [[Y_TR]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[BO_NARROW]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %y = mul i32 %p, %p ; thwart complexity-based canonicalization
@@ -879,13 +877,11 @@ define i32 @lowmask_xor_zext_commute(i8 %x, i32 %p) {
   ret i32 %r
 }
 
-; TODO: we could have narrowed the or
-
 define i24 @lowmask_or_zext_commute(i16 %x, i24 %y) {
 ; CHECK-LABEL: @lowmask_or_zext_commute(
-; CHECK-NEXT:    [[ZX:%.*]] = zext i16 [[X:%.*]] to i24
-; CHECK-NEXT:    [[Y_MASKED:%.*]] = and i24 [[Y:%.*]], 65535
-; CHECK-NEXT:    [[R:%.*]] = or i24 [[Y_MASKED]], [[ZX]]
+; CHECK-NEXT:    [[Y_TR:%.*]] = trunc i24 [[Y:%.*]] to i16
+; CHECK-NEXT:    [[BO_NARROW:%.*]] = or i16 [[Y_TR]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i16 [[BO_NARROW]] to i24
 ; CHECK-NEXT:    ret i24 [[R]]
 ;
   %zx = zext i16 %x to i24