[llvm] 8dab0a4 - [DAGCombine][X86][AArch64] 'A - (A & (B - 1))' -> 'A & (0 - B)' fold (PR44448)

Fri Jan 3 02:59:50 PST 2020

Author: Roman Lebedev
Date: 2020-01-03T13:58:36+03:00
New Revision: 8dab0a4a7d691f2704f1079538e0ef29548db159

URL: https://github.com/llvm/llvm-project/commit/8dab0a4a7d691f2704f1079538e0ef29548db159
DIFF: https://github.com/llvm/llvm-project/commit/8dab0a4a7d691f2704f1079538e0ef29548db159.diff

LOG: [DAGCombine][X86][AArch64] 'A - (A & (B - 1))' -> 'A & (0 - B)' fold (PR44448)

While we do manage to fold integer-typed IR in middle-end,
we can't do that for the main motivational case of pointers.

There is @llvm.ptrmask() intrinsic which may or may not be helpful,
but i'm not sure it is fully considered canonical yet,
not everything is fully aware of it likely.

https://rise4fun.com/Alive/ZVdp

Name: ptr - (ptr & (alignment-1))  ->  ptr & (0 - alignment)
  %mask = add i64 %alignment, -1
  %bias = and i64 %ptr, %mask
  %r = sub i64 %ptr, %bias
=>
  %highbitmask = sub i64 0, %alignment
  %r = and i64 %ptr, %highbitmask

See
  https://bugs.llvm.org/show_bug.cgi?id=44448
  https://reviews.llvm.org/D71499

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/align-down.ll
    llvm/test/CodeGen/X86/align-down.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1415b1e37d15..5dbe29f5fbdb 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3104,6 +3104,21 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
                                    N1.getOperand(0)));
 
+  // A - (A & (B - 1))  ->  A & (0 - B)
+  if (N1.getOpcode() == ISD::AND && N1.hasOneUse()) {
+    SDValue A = N1.getOperand(0);
+    SDValue BDec = N1.getOperand(1);
+    if (A != N0)
+      std::swap(A, BDec);
+    if (A == N0 && BDec.getOpcode() == ISD::ADD &&
+        isAllOnesOrAllOnesSplat(BDec->getOperand(1))) {
+      SDValue B = BDec.getOperand(0);
+      SDValue NegB =
+          DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), B);
+      return DAG.getNode(ISD::AND, DL, VT, A, NegB);
+    }
+  }
+
   // fold (X - (-Y * Z)) -> (X + (Y * Z))
   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
     if (N1.getOperand(0).getOpcode() == ISD::SUB &&

diff  --git a/llvm/test/CodeGen/AArch64/align-down.ll b/llvm/test/CodeGen/AArch64/align-down.ll
index 23ff194908cb..4ad4d115157f 100644
--- a/llvm/test/CodeGen/AArch64/align-down.ll
+++ b/llvm/test/CodeGen/AArch64/align-down.ll
@@ -17,9 +17,8 @@
 define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
 ; CHECK-LABEL: t0_32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w1, #1 // =1
-; CHECK-NEXT:    and w8, w0, w8
-; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    neg w8, w1
+; CHECK-NEXT:    and w0, w0, w8
 ; CHECK-NEXT:    ret
   %mask = add i32 %alignment, -1
   %bias = and i32 %ptr, %mask
@@ -29,9 +28,8 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
 define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
 ; CHECK-LABEL: t1_64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub x8, x1, #1 // =1
-; CHECK-NEXT:    and x8, x0, x8
-; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    neg x8, x1
+; CHECK-NEXT:    and x0, x0, x8
 ; CHECK-NEXT:    ret
   %mask = add i64 %alignment, -1
   %bias = and i64 %ptr, %mask
@@ -42,9 +40,8 @@ define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
 define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
 ; CHECK-LABEL: t2_commutative:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w1, #1 // =1
-; CHECK-NEXT:    and w8, w8, w0
-; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    neg w8, w1
+; CHECK-NEXT:    and w0, w0, w8
 ; CHECK-NEXT:    ret
   %mask = add i32 %alignment, -1
   %bias = and i32 %mask, %ptr ; swapped
@@ -57,9 +54,9 @@ define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
 define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
 ; CHECK-LABEL: t3_extrause0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w9, w1
 ; CHECK-NEXT:    sub w8, w1, #1 // =1
-; CHECK-NEXT:    and w9, w0, w8
-; CHECK-NEXT:    sub w0, w0, w9
+; CHECK-NEXT:    and w0, w0, w9
 ; CHECK-NEXT:    str w8, [x2]
 ; CHECK-NEXT:    ret
   %mask = add i32 %alignment, -1

diff  --git a/llvm/test/CodeGen/X86/align-down.ll b/llvm/test/CodeGen/X86/align-down.ll
index 261740fee5f8..b546324f95e7 100644
--- a/llvm/test/CodeGen/X86/align-down.ll
+++ b/llvm/test/CodeGen/X86/align-down.ll
@@ -18,19 +18,16 @@
 define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
 ; X86-LABEL: t0_32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    decl %ecx
-; X86-NEXT:    andl %eax, %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: t0_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    decl %esi
-; X64-NEXT:    andl %edi, %esi
-; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %mask = add i32 %alignment, -1
   %bias = and i32 %ptr, %mask
@@ -40,26 +37,19 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
 define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
 ; X86-LABEL: t1_64:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    addl $-1, %ecx
-; X86-NEXT:    adcl $-1, %esi
-; X86-NEXT:    andl %edx, %esi
-; X86-NEXT:    andl %eax, %ecx
-; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    sbbl %esi, %edx
-; X86-NEXT:    popl %esi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: t1_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    decq %rsi
-; X64-NEXT:    andq %rdi, %rsi
-; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
   %mask = add i64 %alignment, -1
   %bias = and i64 %ptr, %mask
@@ -70,19 +60,16 @@ define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
 define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
 ; X86-LABEL: t2_commutative:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    decl %ecx
-; X86-NEXT:    andl %eax, %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: t2_commutative:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    decl %esi
-; X64-NEXT:    andl %edi, %esi
-; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %mask = add i32 %alignment, -1
   %bias = and i32 %mask, %ptr ; swapped
@@ -95,22 +82,22 @@ define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
 define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
 ; X86-LABEL: t3_extrause0:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    decl %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal -1(%eax), %edx
 ; X86-NEXT:    movl %edx, (%ecx)
-; X86-NEXT:    andl %eax, %edx
-; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: t3_extrause0:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    decl %esi
-; X64-NEXT:    movl %esi, (%rdx)
-; X64-NEXT:    andl %edi, %esi
-; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    leal -1(%rax), %ecx
+; X64-NEXT:    movl %ecx, (%rdx)
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andl %edi, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %mask = add i32 %alignment, -1
   store i32 %mask, i32* %mask_storage