[llvm] 8c74205 - [SelectionDAG][DAGCombiner] Reuse exist node by reassociate
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 24 08:15:14 PDT 2022
Author: chenglin.bi
Date: 2022-06-24T23:15:06+08:00
New Revision: 8c7420564264af5913fa8e699e2795d0f2cd48ac
URL: https://github.com/llvm/llvm-project/commit/8c7420564264af5913fa8e699e2795d0f2cd48ac
DIFF: https://github.com/llvm/llvm-project/commit/8c7420564264af5913fa8e699e2795d0f2cd48ac.diff
LOG: [SelectionDAG][DAGCombiner] Reuse exist node by reassociate
When already have (op N0, N2), reassociate (op (op N0, N1), N2) to (op (op N0, N2), N1) to reuse the exist (op N0, N2)
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D122539
Added:
llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
llvm/test/CodeGen/X86/pr56170.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
llvm/test/CodeGen/X86/ctpop-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 10da6b97dada5..38f440acc1a66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1008,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).
- if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
- return false;
+ // (load/store (add, (add, x, y), offset2)) ->
+ // (load/store (add, (add, x, offset2), y)).
- if (N0.hasOneUse())
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
- auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
- if (!C1 || !C2)
+ if (!C2)
return false;
- const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
- if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ if (C2APIntVal.getSignificantBits() > 64)
return false;
- const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
- if (CombinedValueIntVal.getBitWidth() > 64)
- return false;
- const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
-
- for (SDNode *Node : N->uses()) {
- auto LoadStore = dyn_cast<MemSDNode>(Node);
- if (LoadStore) {
- // Is x[offset2] already not a legal addressing mode? If so then
- // reassociating the constants breaks nothing (we test offset2 because
- // that's the one we hope to fold into the load or store).
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (N0.hasOneUse())
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getSignificantBits() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N->uses()) {
+ if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
+ }
+ }
+ } else {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
+ if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+ return false;
+
+ for (SDNode *Node : N->uses()) {
+ auto *LoadStore = dyn_cast<MemSDNode>(Node);
+ if (!LoadStore)
+ return false;
+
+ // Is x[offset2] a legal addressing mode? If so then
+ // reassociating the constants breaks address pattern
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
@@ -1042,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
- continue;
-
- // Would x[offset1+offset2] still be a legal addressing mode?
- AM.BaseOffs = CombinedValue;
- if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
- return true;
+ return false;
}
+ return true;
}
return false;
@@ -1099,6 +1124,28 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return N00;
}
+ if (TLI.isReassocProfitable(DAG, N0, N1)) {
+ if (N1 != N01) {
+ // Reassociate if (op N00, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
+ // if Op (Op N00, N1), N01 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
+ }
+ }
+
+ if (N1 != N00) {
+ // Reassociate if (op N01, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
+ // if Op (Op N01, N1), N00 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
index 73bde62a671dc..b01412ba2e666 100644
--- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
@@ -5,17 +5,15 @@
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
; GCN-LABEL: xor3_i1_const:
; GCN: ; %bb.0: ; %main_body
-; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
; GCN-NEXT: s_mov_b32 m0, s1
-; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
-; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
+; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
+; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0
; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x
-; GCN-NEXT: s_and_b64 s[2:3], s[2:3], vcc
+; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1
; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
-; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], -1
-; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
-; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
-; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], -1
+; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc
+; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1]
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
; GCN-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
new file mode 100644
index 0000000000000..9ef9c14084a7d
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define i32 @reassociate_xor(float %x, float %y) {
+; CHECK-LABEL: reassociate_xor:
+; CHECK: .functype reassociate_xor (f32, f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: i32.const 1
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: block
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32.const 0x1p-23
+; CHECK-NEXT: f32.le
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const 0x1p-23
+; CHECK-NEXT: f32.gt
+; CHECK-NEXT: i32.ne
+; CHECK-NEXT: br_if 0 # 0: down to label0
+; CHECK-NEXT: # %bb.1: # %if.then.i
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: .LBB0_2: # %if.end.i
+; CHECK-NEXT: end_block # label0:
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: # fallthrough-return
+entry: ; preds = %if.then, %entry
+ %cmp0 = fcmp ule float %x, 0x3E80000000000000
+ %cmp1 = fcmp ugt float %y, 0x3E80000000000000
+ %cmp2 = xor i1 %cmp0, %cmp1
+ br i1 %cmp2, label %if.end.i, label %if.then.i
+
+if.then.i: ; preds = %if.end
+ br label %if.end.i
+
+if.end.i: ; preds = %if.then.i, %if.end
+ %s = phi i32 [ 1, %entry ], [ 0, %if.then.i ]
+ ret i32 %s
+}
diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 6ce9846efb6aa..5c3712e88cb49 100644
--- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -12,222 +12,222 @@ define fastcc i64 @foo() nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movq X(%rip), %r8
-; CHECK-NEXT: movq X(%rip), %r10
; CHECK-NEXT: movq X(%rip), %r9
-; CHECK-NEXT: movq X(%rip), %r12
; CHECK-NEXT: movq X(%rip), %r15
+; CHECK-NEXT: movq X(%rip), %rax
+; CHECK-NEXT: movq X(%rip), %rdx
+; CHECK-NEXT: movq X(%rip), %r12
; CHECK-NEXT: movq X(%rip), %r14
; CHECK-NEXT: movq X(%rip), %r11
-; CHECK-NEXT: movq X(%rip), %rdx
-; CHECK-NEXT: addq %r15, %rdx
-; CHECK-NEXT: movq X(%rip), %rsi
-; CHECK-NEXT: bswapq %rsi
-; CHECK-NEXT: leaq (%r11,%r14), %rbx
-; CHECK-NEXT: addq %r15, %rbx
-; CHECK-NEXT: addq %rdx, %rbx
-; CHECK-NEXT: addq %rsi, %rbx
-; CHECK-NEXT: leaq (%r9,%r10), %rdx
-; CHECK-NEXT: addq %rdx, %rdx
-; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: movq X(%rip), %rdi
-; CHECK-NEXT: addq %rbx, %r12
-; CHECK-NEXT: addq %r8, %rdx
-; CHECK-NEXT: addq %rbx, %rdx
-; CHECK-NEXT: bswapq %rdi
-; CHECK-NEXT: leaq (%r15,%r14), %rsi
-; CHECK-NEXT: addq %r12, %rsi
-; CHECK-NEXT: addq %r11, %rdi
-; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: leaq (%r10,%r8), %rsi
-; CHECK-NEXT: addq %rsi, %rsi
-; CHECK-NEXT: addq %rdx, %rsi
-; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: addq %r12, %rdi
-; CHECK-NEXT: addq %rdi, %r9
-; CHECK-NEXT: addq %rdx, %rsi
+; CHECK-NEXT: movq X(%rip), %rcx
+; CHECK-NEXT: movq X(%rip), %rbx
+; CHECK-NEXT: bswapq %rcx
+; CHECK-NEXT: leaq (%r11,%r14), %rsi
+; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %rdi, %rsi
+; CHECK-NEXT: addq %rcx, %rsi
+; CHECK-NEXT: leaq (%r15,%r9), %r8
+; CHECK-NEXT: leaq (%r8,%rax), %r10
+; CHECK-NEXT: addq %rsi, %rdx
+; CHECK-NEXT: addq %r10, %r10
; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%r12,%r15), %rdi
-; CHECK-NEXT: addq %r9, %rdi
-; CHECK-NEXT: addq %r14, %rbx
-; CHECK-NEXT: addq %rdi, %rbx
-; CHECK-NEXT: leaq (%rdx,%r8), %rdi
-; CHECK-NEXT: addq %rdi, %rdi
-; CHECK-NEXT: addq %rsi, %rdi
+; CHECK-NEXT: addq %rsi, %r10
+; CHECK-NEXT: addq %r11, %rbx
+; CHECK-NEXT: leaq (%r12,%r14), %rcx
+; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %rcx, %rbx
+; CHECK-NEXT: addq %r8, %r8
+; CHECK-NEXT: addq %r10, %r8
; CHECK-NEXT: movq X(%rip), %rcx
-; CHECK-NEXT: addq %r9, %rbx
-; CHECK-NEXT: addq %rbx, %r10
-; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: addq %rbx, %rdi
+; CHECK-NEXT: addq %rdx, %rbx
+; CHECK-NEXT: addq %rbx, %rax
+; CHECK-NEXT: addq %r10, %r8
+; CHECK-NEXT: addq %rbx, %r8
; CHECK-NEXT: bswapq %rcx
-; CHECK-NEXT: leaq (%r9,%r12), %rax
-; CHECK-NEXT: addq %r10, %rax
-; CHECK-NEXT: addq %r15, %rcx
+; CHECK-NEXT: leaq (%rdx,%r12), %rsi
+; CHECK-NEXT: addq %rax, %rsi
+; CHECK-NEXT: addq %r14, %rcx
+; CHECK-NEXT: addq %rsi, %rcx
+; CHECK-NEXT: leaq (%r10,%r9), %rbx
+; CHECK-NEXT: addq %rbx, %rbx
+; CHECK-NEXT: addq %r8, %rbx
+; CHECK-NEXT: movq X(%rip), %rdi
; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: leaq (%rsi,%rdx), %r11
-; CHECK-NEXT: addq %r11, %r11
-; CHECK-NEXT: addq %rdi, %r11
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %r10, %rcx
-; CHECK-NEXT: addq %rcx, %r8
-; CHECK-NEXT: addq %rdi, %r11
-; CHECK-NEXT: addq %rcx, %r11
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%r10,%r9), %rcx
-; CHECK-NEXT: addq %r8, %rcx
-; CHECK-NEXT: addq %r12, %rbx
+; CHECK-NEXT: addq %rcx, %r15
+; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: leaq (%rdi,%rsi), %r14
+; CHECK-NEXT: bswapq %rdi
+; CHECK-NEXT: leaq (%rax,%rdx), %rcx
+; CHECK-NEXT: addq %r15, %rcx
+; CHECK-NEXT: addq %r12, %rdi
+; CHECK-NEXT: addq %rcx, %rdi
+; CHECK-NEXT: leaq (%r8,%r10), %r12
+; CHECK-NEXT: addq %r12, %r12
+; CHECK-NEXT: addq %rbx, %r12
+; CHECK-NEXT: movq X(%rip), %rcx
+; CHECK-NEXT: addq %r15, %rdi
+; CHECK-NEXT: addq %rdi, %r9
+; CHECK-NEXT: addq %rbx, %r12
+; CHECK-NEXT: addq %rdi, %r12
+; CHECK-NEXT: bswapq %rcx
+; CHECK-NEXT: leaq (%r15,%rax), %rdi
+; CHECK-NEXT: addq %r9, %rdi
+; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: leaq (%rbx,%r8), %r13
+; CHECK-NEXT: addq %r13, %r13
+; CHECK-NEXT: addq %r12, %r13
+; CHECK-NEXT: movq X(%rip), %rdx
+; CHECK-NEXT: addq %r9, %rcx
+; CHECK-NEXT: addq %rcx, %r10
+; CHECK-NEXT: addq %r12, %r13
+; CHECK-NEXT: addq %rcx, %r13
+; CHECK-NEXT: bswapq %rdx
+; CHECK-NEXT: leaq (%r9,%r15), %rcx
+; CHECK-NEXT: addq %r10, %rcx
+; CHECK-NEXT: addq %rax, %rdx
+; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: leaq (%r12,%rbx), %r14
; CHECK-NEXT: addq %r14, %r14
-; CHECK-NEXT: addq %r11, %r14
+; CHECK-NEXT: addq %r13, %r14
; CHECK-NEXT: movq X(%rip), %rax
-; CHECK-NEXT: addq %r8, %rbx
-; CHECK-NEXT: addq %rbx, %rdx
-; CHECK-NEXT: addq %r11, %r14
-; CHECK-NEXT: addq %rbx, %r14
+; CHECK-NEXT: addq %r10, %rdx
+; CHECK-NEXT: addq %rdx, %r8
+; CHECK-NEXT: addq %r13, %r14
+; CHECK-NEXT: addq %rdx, %r14
; CHECK-NEXT: bswapq %rax
-; CHECK-NEXT: leaq (%r8,%r10), %rbx
-; CHECK-NEXT: addq %rdx, %rbx
-; CHECK-NEXT: addq %r9, %rax
+; CHECK-NEXT: leaq (%r10,%r9), %rcx
+; CHECK-NEXT: addq %r8, %rcx
+; CHECK-NEXT: addq %r15, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: leaq (%r13,%r12), %r11
+; CHECK-NEXT: addq %r11, %r11
+; CHECK-NEXT: addq %r14, %r11
+; CHECK-NEXT: movq X(%rip), %rcx
+; CHECK-NEXT: addq %r8, %rax
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: addq %r14, %r11
+; CHECK-NEXT: addq %rax, %r11
+; CHECK-NEXT: bswapq %rcx
+; CHECK-NEXT: leaq (%r8,%r10), %rax
; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: leaq (%r11,%rdi), %r9
+; CHECK-NEXT: addq %r9, %rcx
+; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: leaq (%r14,%r13), %r9
; CHECK-NEXT: addq %r9, %r9
-; CHECK-NEXT: addq %r14, %r9
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: addq %rax, %rsi
-; CHECK-NEXT: addq %r14, %r9
-; CHECK-NEXT: addq %rax, %r9
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%rdx,%r8), %rax
-; CHECK-NEXT: addq %rsi, %rax
-; CHECK-NEXT: addq %r10, %rbx
-; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: leaq (%r14,%r11), %r10
+; CHECK-NEXT: addq %r11, %r9
+; CHECK-NEXT: movq X(%rip), %rax
+; CHECK-NEXT: addq %rbx, %rcx
+; CHECK-NEXT: addq %rcx, %r12
+; CHECK-NEXT: addq %r11, %r9
+; CHECK-NEXT: addq %rcx, %r9
+; CHECK-NEXT: bswapq %rax
+; CHECK-NEXT: leaq (%rbx,%r8), %rcx
+; CHECK-NEXT: addq %r12, %rcx
+; CHECK-NEXT: addq %r10, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: leaq (%r11,%r14), %r10
; CHECK-NEXT: addq %r10, %r10
; CHECK-NEXT: addq %r9, %r10
-; CHECK-NEXT: movq X(%rip), %rax
-; CHECK-NEXT: addq %rsi, %rbx
-; CHECK-NEXT: addq %rbx, %rdi
+; CHECK-NEXT: movq X(%rip), %rsi
+; CHECK-NEXT: addq %r12, %rax
+; CHECK-NEXT: addq %rax, %r13
; CHECK-NEXT: addq %r9, %r10
-; CHECK-NEXT: addq %rbx, %r10
+; CHECK-NEXT: addq %rax, %r10
+; CHECK-NEXT: bswapq %rsi
+; CHECK-NEXT: leaq (%r12,%rbx), %rax
+; CHECK-NEXT: addq %r13, %rax
+; CHECK-NEXT: addq %r8, %rsi
+; CHECK-NEXT: addq %rax, %rsi
+; CHECK-NEXT: leaq (%r9,%r11), %rdx
+; CHECK-NEXT: addq %rdx, %rdx
+; CHECK-NEXT: addq %r10, %rdx
+; CHECK-NEXT: movq X(%rip), %rax
+; CHECK-NEXT: addq %r13, %rsi
+; CHECK-NEXT: addq %rsi, %r14
+; CHECK-NEXT: addq %r10, %rdx
+; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: bswapq %rax
-; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
-; CHECK-NEXT: addq %rdi, %rbx
-; CHECK-NEXT: addq %r8, %rax
+; CHECK-NEXT: leaq (%r13,%r12), %rsi
+; CHECK-NEXT: addq %r14, %rsi
; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: leaq (%r9,%r14), %r8
+; CHECK-NEXT: addq %rsi, %rax
+; CHECK-NEXT: leaq (%r10,%r9), %r8
; CHECK-NEXT: addq %r8, %r8
-; CHECK-NEXT: addq %r10, %r8
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: addq %rdx, %r8
+; CHECK-NEXT: movq X(%rip), %rsi
+; CHECK-NEXT: addq %r14, %rax
; CHECK-NEXT: addq %rax, %r11
-; CHECK-NEXT: addq %r10, %r8
+; CHECK-NEXT: addq %rdx, %r8
; CHECK-NEXT: addq %rax, %r8
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%rdi,%rsi), %rax
+; CHECK-NEXT: bswapq %rsi
+; CHECK-NEXT: leaq (%r14,%r13), %rax
; CHECK-NEXT: addq %r11, %rax
-; CHECK-NEXT: addq %rdx, %rbx
-; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: leaq (%r10,%r9), %r15
-; CHECK-NEXT: addq %r15, %r15
-; CHECK-NEXT: addq %r8, %r15
-; CHECK-NEXT: movq X(%rip), %rax
-; CHECK-NEXT: addq %r11, %rbx
-; CHECK-NEXT: addq %rbx, %r14
-; CHECK-NEXT: addq %r8, %r15
-; CHECK-NEXT: addq %rbx, %r15
-; CHECK-NEXT: bswapq %rax
-; CHECK-NEXT: leaq (%r11,%rdi), %rbx
-; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: addq %r12, %rsi
+; CHECK-NEXT: addq %rax, %rsi
+; CHECK-NEXT: leaq (%rdx,%r10), %rax
+; CHECK-NEXT: addq %rax, %rax
+; CHECK-NEXT: addq %r8, %rax
+; CHECK-NEXT: movq X(%rip), %rdi
+; CHECK-NEXT: addq %r11, %rsi
+; CHECK-NEXT: addq %rsi, %r9
+; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rsi, %rax
-; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: leaq (%r8,%r10), %rsi
+; CHECK-NEXT: bswapq %rdi
+; CHECK-NEXT: leaq (%r11,%r14), %rsi
+; CHECK-NEXT: addq %r9, %rsi
+; CHECK-NEXT: addq %r13, %rdi
+; CHECK-NEXT: addq %rsi, %rdi
+; CHECK-NEXT: leaq (%r8,%rdx), %rsi
; CHECK-NEXT: addq %rsi, %rsi
-; CHECK-NEXT: addq %r15, %rsi
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %r14, %rax
-; CHECK-NEXT: addq %rax, %r9
-; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: addq %rax, %rsi
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%r14,%r11), %rax
-; CHECK-NEXT: addq %r9, %rax
-; CHECK-NEXT: addq %rdi, %rbx
-; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: leaq (%r15,%r8), %r12
-; CHECK-NEXT: addq %r12, %r12
-; CHECK-NEXT: addq %rsi, %r12
; CHECK-NEXT: movq X(%rip), %rcx
-; CHECK-NEXT: addq %r9, %rbx
-; CHECK-NEXT: addq %rbx, %r10
-; CHECK-NEXT: addq %rsi, %r12
-; CHECK-NEXT: addq %rbx, %r12
+; CHECK-NEXT: addq %r9, %rdi
+; CHECK-NEXT: addq %rdi, %r10
+; CHECK-NEXT: addq %rax, %rsi
+; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: bswapq %rcx
-; CHECK-NEXT: leaq (%r9,%r14), %rax
-; CHECK-NEXT: addq %r10, %rax
-; CHECK-NEXT: addq %r11, %rcx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: leaq (%rsi,%r15), %rax
-; CHECK-NEXT: addq %rax, %rax
-; CHECK-NEXT: addq %r12, %rax
-; CHECK-NEXT: movq X(%rip), %rbx
+; CHECK-NEXT: leaq (%r9,%r11), %rdi
+; CHECK-NEXT: addq %r10, %rdi
+; CHECK-NEXT: addq %r14, %rcx
+; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: leaq (%rax,%r8), %rdi
+; CHECK-NEXT: addq %rdi, %rdi
+; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %r10, %rcx
-; CHECK-NEXT: addq %rcx, %r8
-; CHECK-NEXT: addq %r12, %rax
-; CHECK-NEXT: addq %rcx, %rax
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%r10,%r9), %rcx
-; CHECK-NEXT: addq %r8, %rcx
-; CHECK-NEXT: addq %r14, %rbx
-; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: leaq (%r12,%rsi), %rcx
-; CHECK-NEXT: addq %rcx, %rcx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: movq X(%rip), %rdx
-; CHECK-NEXT: addq %r8, %rbx
-; CHECK-NEXT: addq %rbx, %r15
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: addq %rbx, %rcx
-; CHECK-NEXT: bswapq %rdx
-; CHECK-NEXT: leaq (%r8,%r10), %rbx
-; CHECK-NEXT: addq %r15, %rbx
-; CHECK-NEXT: addq %r9, %rdx
-; CHECK-NEXT: addq %rbx, %rdx
-; CHECK-NEXT: leaq (%rax,%r12), %rbx
-; CHECK-NEXT: addq %rbx, %rbx
-; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: addq %r15, %rdx
-; CHECK-NEXT: addq %rdx, %rsi
-; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: addq %rdx, %rbx
-; CHECK-NEXT: movq X(%rip), %rdx
-; CHECK-NEXT: bswapq %rdx
-; CHECK-NEXT: addq %r10, %rdx
-; CHECK-NEXT: leaq (%r15,%r8), %rdi
+; CHECK-NEXT: addq %rcx, %rdx
; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: addq %rdi, %rdx
-; CHECK-NEXT: addq %rax, %rcx
-; CHECK-NEXT: addq %rcx, %rcx
-; CHECK-NEXT: addq %rbx, %rcx
+; CHECK-NEXT: addq %rcx, %rdi
+; CHECK-NEXT: movq X(%rip), %rcx
+; CHECK-NEXT: bswapq %rcx
+; CHECK-NEXT: addq %r11, %rcx
+; CHECK-NEXT: leaq (%r10,%r9), %rbx
+; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rbx, %rcx
-; CHECK-NEXT: addq %rsi, %rdx
-; CHECK-NEXT: addq %rdx, %r12
+; CHECK-NEXT: addq %rax, %rsi
+; CHECK-NEXT: addq %rsi, %rsi
+; CHECK-NEXT: addq %rdi, %rsi
+; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %rcx, %r8
+; CHECK-NEXT: addq %rcx, %rsi
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: bswapq %rax
-; CHECK-NEXT: addq %r15, %rsi
+; CHECK-NEXT: addq %r10, %rdx
; CHECK-NEXT: movq %rax, X(%rip)
+; CHECK-NEXT: addq %r9, %rax
+; CHECK-NEXT: addq %r8, %rdx
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %r8, %rax
-; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %rsi, %rax
-; CHECK-NEXT: addq %r12, %rax
-; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index 0070b1a4ac922..086deb5d89eb2 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -162,33 +162,29 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
define i1 @ctpop_trunc_non_power2(i255 %x) nounwind {
; CHECK-LABEL: ctpop_trunc_non_power2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: movq %rcx, %r9
; CHECK-NEXT: andq %r8, %r9
-; CHECK-NEXT: movq %rdi, %r11
-; CHECK-NEXT: addq $-1, %r11
-; CHECK-NEXT: movq %rsi, %r10
-; CHECK-NEXT: adcq $-1, %r10
-; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: movq %rdi, %r10
+; CHECK-NEXT: addq $-1, %r10
+; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: adcq $-1, %rax
-; CHECK-NEXT: movq %rcx, %rbx
-; CHECK-NEXT: adcq %r8, %rbx
-; CHECK-NEXT: andq %rdi, %r11
-; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: movq %rdx, %r11
+; CHECK-NEXT: adcq $-1, %r11
+; CHECK-NEXT: adcq %r8, %rcx
+; CHECK-NEXT: andq %rdi, %r10
+; CHECK-NEXT: andq %rdx, %r11
+; CHECK-NEXT: orq %r10, %r11
+; CHECK-NEXT: andq %r9, %rcx
+; CHECK-NEXT: andq %rsi, %rax
+; CHECK-NEXT: orq %rcx, %rax
; CHECK-NEXT: orq %r11, %rax
-; CHECK-NEXT: andq %rsi, %r10
-; CHECK-NEXT: andq %r8, %rbx
-; CHECK-NEXT: andq %rcx, %rbx
-; CHECK-NEXT: orq %r10, %rbx
-; CHECK-NEXT: orq %rax, %rbx
; CHECK-NEXT: sete %cl
; CHECK-NEXT: orq %rdx, %rdi
; CHECK-NEXT: orq %rsi, %r9
; CHECK-NEXT: orq %rdi, %r9
; CHECK-NEXT: setne %al
; CHECK-NEXT: andb %cl, %al
-; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%a = call i255 @llvm.ctpop.i255(i255 %x)
%b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits.
diff --git a/llvm/test/CodeGen/X86/pr56170.ll b/llvm/test/CodeGen/X86/pr56170.ll
new file mode 100644
index 0000000000000..dfd9391cdd54c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr56170.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-generic < %s | FileCheck %s
+
+define void @reassociation_gt64bit(i32 %x, i32 %y, ptr %s) {
+; CHECK-LABEL: reassociation_gt64bit:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: movq %rcx, (%rdx)
+; CHECK-NEXT: movw $64, 8(%rdx)
+; CHECK-NEXT: retq
+ %zextx = zext i32 %x to i80
+ %zexty = zext i32 %y to i80
+ %add1 = add i80 %zextx, 1180591620717411303424
+ %add2 = add i80 %add1, %zexty
+ store i80 %add2, ptr %s
+ ret void
+}
More information about the llvm-commits
mailing list