[llvm] 8c74205 - [SelectionDAG][DAGCombiner] Reuse exist node by reassociate

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 24 08:15:14 PDT 2022


Author: chenglin.bi
Date: 2022-06-24T23:15:06+08:00
New Revision: 8c7420564264af5913fa8e699e2795d0f2cd48ac

URL: https://github.com/llvm/llvm-project/commit/8c7420564264af5913fa8e699e2795d0f2cd48ac
DIFF: https://github.com/llvm/llvm-project/commit/8c7420564264af5913fa8e699e2795d0f2cd48ac.diff

LOG: [SelectionDAG][DAGCombiner] Reuse exist node by reassociate

When already have (op N0, N2), reassociate (op (op N0, N1), N2) to (op (op N0, N2), N1) to reuse the exist (op N0, N2)

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D122539

Added: 
    llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
    llvm/test/CodeGen/X86/pr56170.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
    llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
    llvm/test/CodeGen/X86/ctpop-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 10da6b97dada5..38f440acc1a66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1008,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
   // (load/store (add, (add, x, offset1), offset2)) ->
   // (load/store (add, x, offset1+offset2)).
 
-  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
-    return false;
+  // (load/store (add, (add, x, y), offset2)) ->
+  // (load/store (add, (add, x, offset2), y)).
 
-  if (N0.hasOneUse())
+  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
     return false;
 
-  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   auto *C2 = dyn_cast<ConstantSDNode>(N1);
-  if (!C1 || !C2)
+  if (!C2)
     return false;
 
-  const APInt &C1APIntVal = C1->getAPIntValue();
   const APInt &C2APIntVal = C2->getAPIntValue();
-  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+  if (C2APIntVal.getSignificantBits() > 64)
     return false;
 
-  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
-  if (CombinedValueIntVal.getBitWidth() > 64)
-    return false;
-  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
-
-  for (SDNode *Node : N->uses()) {
-    auto LoadStore = dyn_cast<MemSDNode>(Node);
-    if (LoadStore) {
-      // Is x[offset2] already not a legal addressing mode? If so then
-      // reassociating the constants breaks nothing (we test offset2 because
-      // that's the one we hope to fold into the load or store).
+  if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+    if (N0.hasOneUse())
+      return false;
+
+    const APInt &C1APIntVal = C1->getAPIntValue();
+    const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+    if (CombinedValueIntVal.getSignificantBits() > 64)
+      return false;
+    const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+    for (SDNode *Node : N->uses()) {
+      if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+        // Is x[offset2] already not a legal addressing mode? If so then
+        // reassociating the constants breaks nothing (we test offset2 because
+        // that's the one we hope to fold into the load or store).
+        TargetLoweringBase::AddrMode AM;
+        AM.HasBaseReg = true;
+        AM.BaseOffs = C2APIntVal.getSExtValue();
+        EVT VT = LoadStore->getMemoryVT();
+        unsigned AS = LoadStore->getAddressSpace();
+        Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+        if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+          continue;
+
+        // Would x[offset1+offset2] still be a legal addressing mode?
+        AM.BaseOffs = CombinedValue;
+        if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+          return true;
+      }
+    }
+  } else {
+    if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
+      if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+        return false;
+
+    for (SDNode *Node : N->uses()) {
+      auto *LoadStore = dyn_cast<MemSDNode>(Node);
+      if (!LoadStore)
+        return false;
+
+      // Is x[offset2] a legal addressing mode? If so then
+      // reassociating the constants breaks address pattern
       TargetLoweringBase::AddrMode AM;
       AM.HasBaseReg = true;
       AM.BaseOffs = C2APIntVal.getSExtValue();
@@ -1042,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
       unsigned AS = LoadStore->getAddressSpace();
       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
-        continue;
-
-      // Would x[offset1+offset2] still be a legal addressing mode?
-      AM.BaseOffs = CombinedValue;
-      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
-        return true;
+        return false;
     }
+    return true;
   }
 
   return false;
@@ -1099,6 +1124,28 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
       return N00;
   }
 
+  if (TLI.isReassocProfitable(DAG, N0, N1)) {
+    if (N1 != N01) {
+      // Reassociate if (op N00, N1) already exist
+      if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
+        // if Op (Op N00, N1), N01 already exist
+        // we need to stop reassciate to avoid dead loop
+        if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
+          return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
+      }
+    }
+
+    if (N1 != N00) {
+      // Reassociate if (op N01, N1) already exist
+      if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
+        // if Op (Op N01, N1), N00 already exist
+        // we need to stop reassciate to avoid dead loop
+        if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
+          return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
+      }
+    }
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
index 73bde62a671dc..b01412ba2e666 100644
--- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
@@ -5,17 +5,15 @@
 define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
 ; GCN-LABEL: xor3_i1_const:
 ; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v1, 0x42640000
 ; GCN-NEXT:    s_mov_b32 m0, s1
-; GCN-NEXT:    v_cmp_lt_f32_e64 s[2:3], s0, 0
-; GCN-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
+; GCN-NEXT:    v_mov_b32_e32 v1, 0x42640000
+; GCN-NEXT:    v_cmp_nlt_f32_e64 s[2:3], s0, 0
 ; GCN-NEXT:    v_interp_p2_f32 v0, v0, attr0.x
-; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; GCN-NEXT:    v_cmp_nlt_f32_e32 vcc, s0, v1
 ; GCN-NEXT:    v_cmp_gt_f32_e64 s[0:1], 0, v0
-; GCN-NEXT:    s_xor_b64 s[4:5], s[2:3], -1
-; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
-; GCN-NEXT:    s_xor_b64 s[2:3], s[0:1], s[2:3]
-; GCN-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
+; GCN-NEXT:    s_or_b64 s[2:3], s[2:3], vcc
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT:    s_xor_b64 s[2:3], s[2:3], s[0:1]
 ; GCN-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
 ; GCN-NEXT:    ; return to shader part epilog

diff  --git a/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
new file mode 100644
index 0000000000000..9ef9c14084a7d
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define i32 @reassociate_xor(float %x, float %y) {
+; CHECK-LABEL: reassociate_xor:
+; CHECK:         .functype reassociate_xor (f32, f32) -> (i32)
+; CHECK-NEXT:    .local i32
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    i32.const 1
+; CHECK-NEXT:    local.set 2
+; CHECK-NEXT:    block
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f32.const 0x1p-23
+; CHECK-NEXT:    f32.le
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    f32.const 0x1p-23
+; CHECK-NEXT:    f32.gt
+; CHECK-NEXT:    i32.ne
+; CHECK-NEXT:    br_if 0 # 0: down to label0
+; CHECK-NEXT:  # %bb.1: # %if.then.i
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.set 2
+; CHECK-NEXT:  .LBB0_2: # %if.end.i
+; CHECK-NEXT:    end_block # label0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    # fallthrough-return
+entry:                                           ; preds = %if.then, %entry
+  %cmp0 = fcmp ule float %x, 0x3E80000000000000
+  %cmp1 = fcmp ugt float %y, 0x3E80000000000000
+  %cmp2 = xor i1 %cmp0, %cmp1
+  br i1 %cmp2, label %if.end.i, label %if.then.i
+
+if.then.i:                                        ; preds = %if.end
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.then.i, %if.end
+  %s = phi i32 [ 1, %entry ], [ 0, %if.then.i ]
+  ret i32 %s
+}

diff  --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 6ce9846efb6aa..5c3712e88cb49 100644
--- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -12,222 +12,222 @@ define fastcc i64 @foo() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movq X(%rip), %r8
-; CHECK-NEXT:    movq X(%rip), %r10
 ; CHECK-NEXT:    movq X(%rip), %r9
-; CHECK-NEXT:    movq X(%rip), %r12
 ; CHECK-NEXT:    movq X(%rip), %r15
+; CHECK-NEXT:    movq X(%rip), %rax
+; CHECK-NEXT:    movq X(%rip), %rdx
+; CHECK-NEXT:    movq X(%rip), %r12
 ; CHECK-NEXT:    movq X(%rip), %r14
 ; CHECK-NEXT:    movq X(%rip), %r11
-; CHECK-NEXT:    movq X(%rip), %rdx
-; CHECK-NEXT:    addq %r15, %rdx
-; CHECK-NEXT:    movq X(%rip), %rsi
-; CHECK-NEXT:    bswapq %rsi
-; CHECK-NEXT:    leaq (%r11,%r14), %rbx
-; CHECK-NEXT:    addq %r15, %rbx
-; CHECK-NEXT:    addq %rdx, %rbx
-; CHECK-NEXT:    addq %rsi, %rbx
-; CHECK-NEXT:    leaq (%r9,%r10), %rdx
-; CHECK-NEXT:    addq %rdx, %rdx
-; CHECK-NEXT:    addq %r8, %rdx
 ; CHECK-NEXT:    movq X(%rip), %rdi
-; CHECK-NEXT:    addq %rbx, %r12
-; CHECK-NEXT:    addq %r8, %rdx
-; CHECK-NEXT:    addq %rbx, %rdx
-; CHECK-NEXT:    bswapq %rdi
-; CHECK-NEXT:    leaq (%r15,%r14), %rsi
-; CHECK-NEXT:    addq %r12, %rsi
-; CHECK-NEXT:    addq %r11, %rdi
-; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    leaq (%r10,%r8), %rsi
-; CHECK-NEXT:    addq %rsi, %rsi
-; CHECK-NEXT:    addq %rdx, %rsi
-; CHECK-NEXT:    movq X(%rip), %rbx
 ; CHECK-NEXT:    addq %r12, %rdi
-; CHECK-NEXT:    addq %rdi, %r9
-; CHECK-NEXT:    addq %rdx, %rsi
+; CHECK-NEXT:    movq X(%rip), %rcx
+; CHECK-NEXT:    movq X(%rip), %rbx
+; CHECK-NEXT:    bswapq %rcx
+; CHECK-NEXT:    leaq (%r11,%r14), %rsi
+; CHECK-NEXT:    addq %r12, %rsi
 ; CHECK-NEXT:    addq %rdi, %rsi
+; CHECK-NEXT:    addq %rcx, %rsi
+; CHECK-NEXT:    leaq (%r15,%r9), %r8
+; CHECK-NEXT:    leaq (%r8,%rax), %r10
+; CHECK-NEXT:    addq %rsi, %rdx
+; CHECK-NEXT:    addq %r10, %r10
 ; CHECK-NEXT:    bswapq %rbx
-; CHECK-NEXT:    leaq (%r12,%r15), %rdi
-; CHECK-NEXT:    addq %r9, %rdi
-; CHECK-NEXT:    addq %r14, %rbx
-; CHECK-NEXT:    addq %rdi, %rbx
-; CHECK-NEXT:    leaq (%rdx,%r8), %rdi
-; CHECK-NEXT:    addq %rdi, %rdi
-; CHECK-NEXT:    addq %rsi, %rdi
+; CHECK-NEXT:    addq %rsi, %r10
+; CHECK-NEXT:    addq %r11, %rbx
+; CHECK-NEXT:    leaq (%r12,%r14), %rcx
+; CHECK-NEXT:    addq %rdx, %rcx
+; CHECK-NEXT:    addq %rcx, %rbx
+; CHECK-NEXT:    addq %r8, %r8
+; CHECK-NEXT:    addq %r10, %r8
 ; CHECK-NEXT:    movq X(%rip), %rcx
-; CHECK-NEXT:    addq %r9, %rbx
-; CHECK-NEXT:    addq %rbx, %r10
-; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    addq %rbx, %rdi
+; CHECK-NEXT:    addq %rdx, %rbx
+; CHECK-NEXT:    addq %rbx, %rax
+; CHECK-NEXT:    addq %r10, %r8
+; CHECK-NEXT:    addq %rbx, %r8
 ; CHECK-NEXT:    bswapq %rcx
-; CHECK-NEXT:    leaq (%r9,%r12), %rax
-; CHECK-NEXT:    addq %r10, %rax
-; CHECK-NEXT:    addq %r15, %rcx
+; CHECK-NEXT:    leaq (%rdx,%r12), %rsi
+; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    addq %r14, %rcx
+; CHECK-NEXT:    addq %rsi, %rcx
+; CHECK-NEXT:    leaq (%r10,%r9), %rbx
+; CHECK-NEXT:    addq %rbx, %rbx
+; CHECK-NEXT:    addq %r8, %rbx
+; CHECK-NEXT:    movq X(%rip), %rdi
 ; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    leaq (%rsi,%rdx), %r11
-; CHECK-NEXT:    addq %r11, %r11
-; CHECK-NEXT:    addq %rdi, %r11
-; CHECK-NEXT:    movq X(%rip), %rbx
-; CHECK-NEXT:    addq %r10, %rcx
-; CHECK-NEXT:    addq %rcx, %r8
-; CHECK-NEXT:    addq %rdi, %r11
-; CHECK-NEXT:    addq %rcx, %r11
-; CHECK-NEXT:    bswapq %rbx
-; CHECK-NEXT:    leaq (%r10,%r9), %rcx
-; CHECK-NEXT:    addq %r8, %rcx
-; CHECK-NEXT:    addq %r12, %rbx
+; CHECK-NEXT:    addq %rcx, %r15
+; CHECK-NEXT:    addq %r8, %rbx
 ; CHECK-NEXT:    addq %rcx, %rbx
-; CHECK-NEXT:    leaq (%rdi,%rsi), %r14
+; CHECK-NEXT:    bswapq %rdi
+; CHECK-NEXT:    leaq (%rax,%rdx), %rcx
+; CHECK-NEXT:    addq %r15, %rcx
+; CHECK-NEXT:    addq %r12, %rdi
+; CHECK-NEXT:    addq %rcx, %rdi
+; CHECK-NEXT:    leaq (%r8,%r10), %r12
+; CHECK-NEXT:    addq %r12, %r12
+; CHECK-NEXT:    addq %rbx, %r12
+; CHECK-NEXT:    movq X(%rip), %rcx
+; CHECK-NEXT:    addq %r15, %rdi
+; CHECK-NEXT:    addq %rdi, %r9
+; CHECK-NEXT:    addq %rbx, %r12
+; CHECK-NEXT:    addq %rdi, %r12
+; CHECK-NEXT:    bswapq %rcx
+; CHECK-NEXT:    leaq (%r15,%rax), %rdi
+; CHECK-NEXT:    addq %r9, %rdi
+; CHECK-NEXT:    addq %rdx, %rcx
+; CHECK-NEXT:    addq %rdi, %rcx
+; CHECK-NEXT:    leaq (%rbx,%r8), %r13
+; CHECK-NEXT:    addq %r13, %r13
+; CHECK-NEXT:    addq %r12, %r13
+; CHECK-NEXT:    movq X(%rip), %rdx
+; CHECK-NEXT:    addq %r9, %rcx
+; CHECK-NEXT:    addq %rcx, %r10
+; CHECK-NEXT:    addq %r12, %r13
+; CHECK-NEXT:    addq %rcx, %r13
+; CHECK-NEXT:    bswapq %rdx
+; CHECK-NEXT:    leaq (%r9,%r15), %rcx
+; CHECK-NEXT:    addq %r10, %rcx
+; CHECK-NEXT:    addq %rax, %rdx
+; CHECK-NEXT:    addq %rcx, %rdx
+; CHECK-NEXT:    leaq (%r12,%rbx), %r14
 ; CHECK-NEXT:    addq %r14, %r14
-; CHECK-NEXT:    addq %r11, %r14
+; CHECK-NEXT:    addq %r13, %r14
 ; CHECK-NEXT:    movq X(%rip), %rax
-; CHECK-NEXT:    addq %r8, %rbx
-; CHECK-NEXT:    addq %rbx, %rdx
-; CHECK-NEXT:    addq %r11, %r14
-; CHECK-NEXT:    addq %rbx, %r14
+; CHECK-NEXT:    addq %r10, %rdx
+; CHECK-NEXT:    addq %rdx, %r8
+; CHECK-NEXT:    addq %r13, %r14
+; CHECK-NEXT:    addq %rdx, %r14
 ; CHECK-NEXT:    bswapq %rax
-; CHECK-NEXT:    leaq (%r8,%r10), %rbx
-; CHECK-NEXT:    addq %rdx, %rbx
-; CHECK-NEXT:    addq %r9, %rax
+; CHECK-NEXT:    leaq (%r10,%r9), %rcx
+; CHECK-NEXT:    addq %r8, %rcx
+; CHECK-NEXT:    addq %r15, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    leaq (%r13,%r12), %r11
+; CHECK-NEXT:    addq %r11, %r11
+; CHECK-NEXT:    addq %r14, %r11
+; CHECK-NEXT:    movq X(%rip), %rcx
+; CHECK-NEXT:    addq %r8, %rax
+; CHECK-NEXT:    addq %rax, %rbx
+; CHECK-NEXT:    addq %r14, %r11
+; CHECK-NEXT:    addq %rax, %r11
+; CHECK-NEXT:    bswapq %rcx
+; CHECK-NEXT:    leaq (%r8,%r10), %rax
 ; CHECK-NEXT:    addq %rbx, %rax
-; CHECK-NEXT:    leaq (%r11,%rdi), %r9
+; CHECK-NEXT:    addq %r9, %rcx
+; CHECK-NEXT:    addq %rax, %rcx
+; CHECK-NEXT:    leaq (%r14,%r13), %r9
 ; CHECK-NEXT:    addq %r9, %r9
-; CHECK-NEXT:    addq %r14, %r9
-; CHECK-NEXT:    movq X(%rip), %rbx
-; CHECK-NEXT:    addq %rdx, %rax
-; CHECK-NEXT:    addq %rax, %rsi
-; CHECK-NEXT:    addq %r14, %r9
-; CHECK-NEXT:    addq %rax, %r9
-; CHECK-NEXT:    bswapq %rbx
-; CHECK-NEXT:    leaq (%rdx,%r8), %rax
-; CHECK-NEXT:    addq %rsi, %rax
-; CHECK-NEXT:    addq %r10, %rbx
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    leaq (%r14,%r11), %r10
+; CHECK-NEXT:    addq %r11, %r9
+; CHECK-NEXT:    movq X(%rip), %rax
+; CHECK-NEXT:    addq %rbx, %rcx
+; CHECK-NEXT:    addq %rcx, %r12
+; CHECK-NEXT:    addq %r11, %r9
+; CHECK-NEXT:    addq %rcx, %r9
+; CHECK-NEXT:    bswapq %rax
+; CHECK-NEXT:    leaq (%rbx,%r8), %rcx
+; CHECK-NEXT:    addq %r12, %rcx
+; CHECK-NEXT:    addq %r10, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    leaq (%r11,%r14), %r10
 ; CHECK-NEXT:    addq %r10, %r10
 ; CHECK-NEXT:    addq %r9, %r10
-; CHECK-NEXT:    movq X(%rip), %rax
-; CHECK-NEXT:    addq %rsi, %rbx
-; CHECK-NEXT:    addq %rbx, %rdi
+; CHECK-NEXT:    movq X(%rip), %rsi
+; CHECK-NEXT:    addq %r12, %rax
+; CHECK-NEXT:    addq %rax, %r13
 ; CHECK-NEXT:    addq %r9, %r10
-; CHECK-NEXT:    addq %rbx, %r10
+; CHECK-NEXT:    addq %rax, %r10
+; CHECK-NEXT:    bswapq %rsi
+; CHECK-NEXT:    leaq (%r12,%rbx), %rax
+; CHECK-NEXT:    addq %r13, %rax
+; CHECK-NEXT:    addq %r8, %rsi
+; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    leaq (%r9,%r11), %rdx
+; CHECK-NEXT:    addq %rdx, %rdx
+; CHECK-NEXT:    addq %r10, %rdx
+; CHECK-NEXT:    movq X(%rip), %rax
+; CHECK-NEXT:    addq %r13, %rsi
+; CHECK-NEXT:    addq %rsi, %r14
+; CHECK-NEXT:    addq %r10, %rdx
+; CHECK-NEXT:    addq %rsi, %rdx
 ; CHECK-NEXT:    bswapq %rax
-; CHECK-NEXT:    leaq (%rsi,%rdx), %rbx
-; CHECK-NEXT:    addq %rdi, %rbx
-; CHECK-NEXT:    addq %r8, %rax
+; CHECK-NEXT:    leaq (%r13,%r12), %rsi
+; CHECK-NEXT:    addq %r14, %rsi
 ; CHECK-NEXT:    addq %rbx, %rax
-; CHECK-NEXT:    leaq (%r9,%r14), %r8
+; CHECK-NEXT:    addq %rsi, %rax
+; CHECK-NEXT:    leaq (%r10,%r9), %r8
 ; CHECK-NEXT:    addq %r8, %r8
-; CHECK-NEXT:    addq %r10, %r8
-; CHECK-NEXT:    movq X(%rip), %rbx
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %r8
+; CHECK-NEXT:    movq X(%rip), %rsi
+; CHECK-NEXT:    addq %r14, %rax
 ; CHECK-NEXT:    addq %rax, %r11
-; CHECK-NEXT:    addq %r10, %r8
+; CHECK-NEXT:    addq %rdx, %r8
 ; CHECK-NEXT:    addq %rax, %r8
-; CHECK-NEXT:    bswapq %rbx
-; CHECK-NEXT:    leaq (%rdi,%rsi), %rax
+; CHECK-NEXT:    bswapq %rsi
+; CHECK-NEXT:    leaq (%r14,%r13), %rax
 ; CHECK-NEXT:    addq %r11, %rax
-; CHECK-NEXT:    addq %rdx, %rbx
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    leaq (%r10,%r9), %r15
-; CHECK-NEXT:    addq %r15, %r15
-; CHECK-NEXT:    addq %r8, %r15
-; CHECK-NEXT:    movq X(%rip), %rax
-; CHECK-NEXT:    addq %r11, %rbx
-; CHECK-NEXT:    addq %rbx, %r14
-; CHECK-NEXT:    addq %r8, %r15
-; CHECK-NEXT:    addq %rbx, %r15
-; CHECK-NEXT:    bswapq %rax
-; CHECK-NEXT:    leaq (%r11,%rdi), %rbx
-; CHECK-NEXT:    addq %r14, %rbx
+; CHECK-NEXT:    addq %r12, %rsi
+; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    leaq (%rdx,%r10), %rax
+; CHECK-NEXT:    addq %rax, %rax
+; CHECK-NEXT:    addq %r8, %rax
+; CHECK-NEXT:    movq X(%rip), %rdi
+; CHECK-NEXT:    addq %r11, %rsi
+; CHECK-NEXT:    addq %rsi, %r9
+; CHECK-NEXT:    addq %r8, %rax
 ; CHECK-NEXT:    addq %rsi, %rax
-; CHECK-NEXT:    addq %rbx, %rax
-; CHECK-NEXT:    leaq (%r8,%r10), %rsi
+; CHECK-NEXT:    bswapq %rdi
+; CHECK-NEXT:    leaq (%r11,%r14), %rsi
+; CHECK-NEXT:    addq %r9, %rsi
+; CHECK-NEXT:    addq %r13, %rdi
+; CHECK-NEXT:    addq %rsi, %rdi
+; CHECK-NEXT:    leaq (%r8,%rdx), %rsi
 ; CHECK-NEXT:    addq %rsi, %rsi
-; CHECK-NEXT:    addq %r15, %rsi
-; CHECK-NEXT:    movq X(%rip), %rbx
-; CHECK-NEXT:    addq %r14, %rax
-; CHECK-NEXT:    addq %rax, %r9
-; CHECK-NEXT:    addq %r15, %rsi
 ; CHECK-NEXT:    addq %rax, %rsi
-; CHECK-NEXT:    bswapq %rbx
-; CHECK-NEXT:    leaq (%r14,%r11), %rax
-; CHECK-NEXT:    addq %r9, %rax
-; CHECK-NEXT:    addq %rdi, %rbx
-; CHECK-NEXT:    addq %rax, %rbx
-; CHECK-NEXT:    leaq (%r15,%r8), %r12
-; CHECK-NEXT:    addq %r12, %r12
-; CHECK-NEXT:    addq %rsi, %r12
 ; CHECK-NEXT:    movq X(%rip), %rcx
-; CHECK-NEXT:    addq %r9, %rbx
-; CHECK-NEXT:    addq %rbx, %r10
-; CHECK-NEXT:    addq %rsi, %r12
-; CHECK-NEXT:    addq %rbx, %r12
+; CHECK-NEXT:    addq %r9, %rdi
+; CHECK-NEXT:    addq %rdi, %r10
+; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    addq %rdi, %rsi
 ; CHECK-NEXT:    bswapq %rcx
-; CHECK-NEXT:    leaq (%r9,%r14), %rax
-; CHECK-NEXT:    addq %r10, %rax
-; CHECK-NEXT:    addq %r11, %rcx
-; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    leaq (%rsi,%r15), %rax
-; CHECK-NEXT:    addq %rax, %rax
-; CHECK-NEXT:    addq %r12, %rax
-; CHECK-NEXT:    movq X(%rip), %rbx
+; CHECK-NEXT:    leaq (%r9,%r11), %rdi
+; CHECK-NEXT:    addq %r10, %rdi
+; CHECK-NEXT:    addq %r14, %rcx
+; CHECK-NEXT:    addq %rdi, %rcx
+; CHECK-NEXT:    leaq (%rax,%r8), %rdi
+; CHECK-NEXT:    addq %rdi, %rdi
+; CHECK-NEXT:    addq %rsi, %rdi
 ; CHECK-NEXT:    addq %r10, %rcx
-; CHECK-NEXT:    addq %rcx, %r8
-; CHECK-NEXT:    addq %r12, %rax
-; CHECK-NEXT:    addq %rcx, %rax
-; CHECK-NEXT:    bswapq %rbx
-; CHECK-NEXT:    leaq (%r10,%r9), %rcx
-; CHECK-NEXT:    addq %r8, %rcx
-; CHECK-NEXT:    addq %r14, %rbx
-; CHECK-NEXT:    addq %rcx, %rbx
-; CHECK-NEXT:    leaq (%r12,%rsi), %rcx
-; CHECK-NEXT:    addq %rcx, %rcx
-; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    movq X(%rip), %rdx
-; CHECK-NEXT:    addq %r8, %rbx
-; CHECK-NEXT:    addq %rbx, %r15
-; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    addq %rbx, %rcx
-; CHECK-NEXT:    bswapq %rdx
-; CHECK-NEXT:    leaq (%r8,%r10), %rbx
-; CHECK-NEXT:    addq %r15, %rbx
-; CHECK-NEXT:    addq %r9, %rdx
-; CHECK-NEXT:    addq %rbx, %rdx
-; CHECK-NEXT:    leaq (%rax,%r12), %rbx
-; CHECK-NEXT:    addq %rbx, %rbx
-; CHECK-NEXT:    addq %rcx, %rbx
-; CHECK-NEXT:    addq %r15, %rdx
-; CHECK-NEXT:    addq %rdx, %rsi
-; CHECK-NEXT:    addq %rcx, %rbx
-; CHECK-NEXT:    addq %rdx, %rbx
-; CHECK-NEXT:    movq X(%rip), %rdx
-; CHECK-NEXT:    bswapq %rdx
-; CHECK-NEXT:    addq %r10, %rdx
-; CHECK-NEXT:    leaq (%r15,%r8), %rdi
+; CHECK-NEXT:    addq %rcx, %rdx
 ; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    addq %rdi, %rdx
-; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    addq %rcx, %rcx
-; CHECK-NEXT:    addq %rbx, %rcx
+; CHECK-NEXT:    addq %rcx, %rdi
+; CHECK-NEXT:    movq X(%rip), %rcx
+; CHECK-NEXT:    bswapq %rcx
+; CHECK-NEXT:    addq %r11, %rcx
+; CHECK-NEXT:    leaq (%r10,%r9), %rbx
+; CHECK-NEXT:    addq %rdx, %rbx
 ; CHECK-NEXT:    addq %rbx, %rcx
-; CHECK-NEXT:    addq %rsi, %rdx
-; CHECK-NEXT:    addq %rdx, %r12
+; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    addq %rsi, %rsi
+; CHECK-NEXT:    addq %rdi, %rsi
+; CHECK-NEXT:    addq %rdi, %rsi
 ; CHECK-NEXT:    addq %rdx, %rcx
+; CHECK-NEXT:    addq %rcx, %r8
+; CHECK-NEXT:    addq %rcx, %rsi
 ; CHECK-NEXT:    movq X(%rip), %rax
 ; CHECK-NEXT:    bswapq %rax
-; CHECK-NEXT:    addq %r15, %rsi
+; CHECK-NEXT:    addq %r10, %rdx
 ; CHECK-NEXT:    movq %rax, X(%rip)
+; CHECK-NEXT:    addq %r9, %rax
+; CHECK-NEXT:    addq %r8, %rdx
+; CHECK-NEXT:    addq %rdx, %rax
 ; CHECK-NEXT:    addq %r8, %rax
-; CHECK-NEXT:    addq %r12, %rsi
 ; CHECK-NEXT:    addq %rsi, %rax
-; CHECK-NEXT:    addq %r12, %rax
-; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index 0070b1a4ac922..086deb5d89eb2 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -162,33 +162,29 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
 define i1 @ctpop_trunc_non_power2(i255 %x) nounwind {
 ; CHECK-LABEL: ctpop_trunc_non_power2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF
 ; CHECK-NEXT:    movq %rcx, %r9
 ; CHECK-NEXT:    andq %r8, %r9
-; CHECK-NEXT:    movq %rdi, %r11
-; CHECK-NEXT:    addq $-1, %r11
-; CHECK-NEXT:    movq %rsi, %r10
-; CHECK-NEXT:    adcq $-1, %r10
-; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    movq %rdi, %r10
+; CHECK-NEXT:    addq $-1, %r10
+; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    adcq $-1, %rax
-; CHECK-NEXT:    movq %rcx, %rbx
-; CHECK-NEXT:    adcq %r8, %rbx
-; CHECK-NEXT:    andq %rdi, %r11
-; CHECK-NEXT:    andq %rdx, %rax
+; CHECK-NEXT:    movq %rdx, %r11
+; CHECK-NEXT:    adcq $-1, %r11
+; CHECK-NEXT:    adcq %r8, %rcx
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    andq %rdx, %r11
+; CHECK-NEXT:    orq %r10, %r11
+; CHECK-NEXT:    andq %r9, %rcx
+; CHECK-NEXT:    andq %rsi, %rax
+; CHECK-NEXT:    orq %rcx, %rax
 ; CHECK-NEXT:    orq %r11, %rax
-; CHECK-NEXT:    andq %rsi, %r10
-; CHECK-NEXT:    andq %r8, %rbx
-; CHECK-NEXT:    andq %rcx, %rbx
-; CHECK-NEXT:    orq %r10, %rbx
-; CHECK-NEXT:    orq %rax, %rbx
 ; CHECK-NEXT:    sete %cl
 ; CHECK-NEXT:    orq %rdx, %rdi
 ; CHECK-NEXT:    orq %rsi, %r9
 ; CHECK-NEXT:    orq %rdi, %r9
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    andb %cl, %al
-; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq
   %a = call i255 @llvm.ctpop.i255(i255 %x)
   %b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits.

diff  --git a/llvm/test/CodeGen/X86/pr56170.ll b/llvm/test/CodeGen/X86/pr56170.ll
new file mode 100644
index 0000000000000..dfd9391cdd54c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr56170.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-generic < %s | FileCheck %s
+
+define void @reassociation_gt64bit(i32 %x, i32 %y, ptr %s) {
+; CHECK-LABEL: reassociation_gt64bit:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %ecx
+; CHECK-NEXT:    addq %rax, %rcx
+; CHECK-NEXT:    movq %rcx, (%rdx)
+; CHECK-NEXT:    movw $64, 8(%rdx)
+; CHECK-NEXT:    retq
+  %zextx = zext i32 %x to i80
+  %zexty = zext i32 %y to i80
+  %add1 = add i80 %zextx, 1180591620717411303424
+  %add2 = add i80 %add1, %zexty
+  store i80 %add2, ptr %s
+  ret void
+}


        


More information about the llvm-commits mailing list