[llvm] [MachineLICM] Let targets decide if copy-like instructions are cheap (PR #146599)
Guy David via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 08:27:14 PDT 2025
https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/146599
>From 01e724e06be4d47d897cf6d3c96f92b741c951c2 Mon Sep 17 00:00:00 2001
From: Guy David <guyda96 at gmail.com>
Date: Tue, 1 Jul 2025 14:29:54 +0300
Subject: [PATCH] [MachineLICM] Let targets decide if copy-like instructions
are cheap
When checking whether it is profitable to hoist an instruction, the pass
may override a target's ruling because it assumes that all COPY
instructions are cheap, and that may not be the case for all
micro-architectures.
On AArch64 there's 0% difference in performance in LLVM's test-suite.
Additionally, very few tests were affected by this change which shows
how useful it is to keep it.
---
llvm/lib/CodeGen/MachineLICM.cpp | 2 +-
llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll | 258 ++++++++++---------
llvm/test/CodeGen/X86/memfold-mov32r0.ll | 9 -
llvm/test/CodeGen/X86/memfold-mov32r0.mir | 143 ++++++++++
llvm/test/CodeGen/X86/pr57673.ll | 36 +--
5 files changed, 301 insertions(+), 147 deletions(-)
delete mode 100644 llvm/test/CodeGen/X86/memfold-mov32r0.ll
create mode 100644 llvm/test/CodeGen/X86/memfold-mov32r0.mir
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 699d7ab175568..f1811c47e5ad4 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1219,7 +1219,7 @@ bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
/// Return true if the instruction is marked "cheap" or the operand latency
/// between its def and a use is one or less.
bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
- if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
+ if (TII->isAsCheapAsAMove(MI) || MI.isSubregToReg())
return true;
bool isCheap = false;
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
index 9cb2d4444b974..3c3320463f249 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll
@@ -1,17 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
;; Tests that the ppc-vsx-fma-mutate pass with the schedule-ppc-vsx-fma-mutation-early pass does not hoist xxspltiw out of loops.
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
-; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefixes=CHECK64,AIX64 %s
+; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefixes=AIX64 %s
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
-; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK64,LINUX64 %s
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefixes=LINUX64 %s
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32 %s
define void @bar(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
+; AIX64-LABEL: bar:
+; AIX64: # %bb.0: # %entry
+; AIX64-NEXT: lwz r5, 0(r5)
+; AIX64-NEXT: cmpwi r5, 1
+; AIX64-NEXT: bltlr cr0
+; AIX64-NEXT: # %bb.1: # %for.body.preheader
+; AIX64-NEXT: xxspltiw vs0, 1069066811
+; AIX64-NEXT: xxspltiw vs1, 1170469888
+; AIX64-NEXT: mtctr r5
+; AIX64-NEXT: li r5, 0
+; AIX64-NEXT: .align 5
+; AIX64-NEXT: L..BB0_2: # %for.body
+; AIX64-NEXT: #
+; AIX64-NEXT: lxvx vs2, r4, r5
+; AIX64-NEXT: xvmaddmsp vs2, vs0, vs1
+; AIX64-NEXT: stxvx vs2, r3, r5
+; AIX64-NEXT: addi r5, r5, 16
+; AIX64-NEXT: bdnz L..BB0_2
+; AIX64-NEXT: # %bb.3: # %for.end
+; AIX64-NEXT: blr
+;
+; LINUX64-LABEL: bar:
+; LINUX64: # %bb.0: # %entry
+; LINUX64-NEXT: lwz r5, 0(r5)
+; LINUX64-NEXT: cmpwi r5, 1
+; LINUX64-NEXT: bltlr cr0
+; LINUX64-NEXT: # %bb.1: # %for.body.preheader
+; LINUX64-NEXT: xxspltiw vs0, 1069066811
+; LINUX64-NEXT: xxspltiw vs1, 1170469888
+; LINUX64-NEXT: mtctr r5
+; LINUX64-NEXT: li r5, 0
+; LINUX64-NEXT: .p2align 5
+; LINUX64-NEXT: .LBB0_2: # %for.body
+; LINUX64-NEXT: #
+; LINUX64-NEXT: lxvx vs2, r4, r5
+; LINUX64-NEXT: xvmaddmsp vs2, vs0, vs1
+; LINUX64-NEXT: stxvx vs2, r3, r5
+; LINUX64-NEXT: addi r5, r5, 16
+; LINUX64-NEXT: bdnz .LBB0_2
+; LINUX64-NEXT: # %bb.3: # %for.end
+; LINUX64-NEXT: blr
+;
+; CHECK32-LABEL: bar:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: lwz r5, 0(r5)
+; CHECK32-NEXT: cmpwi r5, 0
+; CHECK32-NEXT: blelr cr0
+; CHECK32-NEXT: # %bb.1: # %for.body.preheader
+; CHECK32-NEXT: xxspltiw vs0, 1069066811
+; CHECK32-NEXT: xxspltiw vs1, 1170469888
+; CHECK32-NEXT: li r6, 0
+; CHECK32-NEXT: li r7, 0
+; CHECK32-NEXT: .align 4
+; CHECK32-NEXT: L..BB0_2: # %for.body
+; CHECK32-NEXT: #
+; CHECK32-NEXT: slwi r8, r7, 4
+; CHECK32-NEXT: addic r7, r7, 1
+; CHECK32-NEXT: addze r6, r6
+; CHECK32-NEXT: lxvx vs2, r4, r8
+; CHECK32-NEXT: xvmaddmsp vs2, vs0, vs1
+; CHECK32-NEXT: stxvx vs2, r3, r8
+; CHECK32-NEXT: xor r8, r7, r5
+; CHECK32-NEXT: or. r8, r8, r6
+; CHECK32-NEXT: bne cr0, L..BB0_2
+; CHECK32-NEXT: # %bb.3: # %for.end
+; CHECK32-NEXT: blr
entry:
%0 = load i32, ptr %n, align 4
%cmp11 = icmp sgt i32 %0, 0
@@ -28,7 +95,7 @@ for.body:
%add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
%2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
%add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
- store <4 x float> %2, ptr %add.ptr6, align 1
+ store <4 x float> %2, ptr %add.ptr6, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end, label %for.body
@@ -38,6 +105,74 @@ for.end:
}
define void @foo(i1 %cmp97) #0 {
+; AIX64-LABEL: foo:
+; AIX64: # %bb.0: # %entry
+; AIX64-NEXT: andi. r3, r3, 1
+; AIX64-NEXT: bclr 4, gt, 0
+; AIX64-NEXT: # %bb.1: # %for.body.preheader
+; AIX64-NEXT: xxlxor f0, f0, f0
+; AIX64-NEXT: xxlxor vs1, vs1, vs1
+; AIX64-NEXT: xxlxor f2, f2, f2
+; AIX64-NEXT: .align 4
+; AIX64-NEXT: L..BB1_2: # %for.body
+; AIX64-NEXT: #
+; AIX64-NEXT: xxmrghd vs2, vs2, vs0
+; AIX64-NEXT: xvcvdpsp vs34, vs2
+; AIX64-NEXT: xxmrghd vs2, vs0, vs0
+; AIX64-NEXT: xvcvdpsp vs35, vs2
+; AIX64-NEXT: xxspltiw vs2, 1170469888
+; AIX64-NEXT: vmrgew v2, v2, v3
+; AIX64-NEXT: xvcmpgtsp vs3, vs1, vs34
+; AIX64-NEXT: xvmaddasp vs2, vs34, vs1
+; AIX64-NEXT: xxland vs2, vs3, vs2
+; AIX64-NEXT: xscvspdpn f2, vs2
+; AIX64-NEXT: b L..BB1_2
+;
+; LINUX64-LABEL: foo:
+; LINUX64: # %bb.0: # %entry
+; LINUX64-NEXT: andi. r3, r3, 1
+; LINUX64-NEXT: bclr 4, gt, 0
+; LINUX64-NEXT: # %bb.1: # %for.body.preheader
+; LINUX64-NEXT: xxlxor f0, f0, f0
+; LINUX64-NEXT: xxlxor vs1, vs1, vs1
+; LINUX64-NEXT: xxlxor f2, f2, f2
+; LINUX64-NEXT: .p2align 4
+; LINUX64-NEXT: .LBB1_2: # %for.body
+; LINUX64-NEXT: #
+; LINUX64-NEXT: xxmrghd vs2, vs0, vs2
+; LINUX64-NEXT: xvcvdpsp vs34, vs2
+; LINUX64-NEXT: xxspltd vs2, vs0, 0
+; LINUX64-NEXT: xvcvdpsp vs35, vs2
+; LINUX64-NEXT: xxspltiw vs2, 1170469888
+; LINUX64-NEXT: vmrgew v2, v3, v2
+; LINUX64-NEXT: xvcmpgtsp vs3, vs1, vs34
+; LINUX64-NEXT: xvmaddasp vs2, vs34, vs1
+; LINUX64-NEXT: xxland vs2, vs3, vs2
+; LINUX64-NEXT: xxsldwi vs2, vs2, vs2, 3
+; LINUX64-NEXT: xscvspdpn f2, vs2
+; LINUX64-NEXT: b .LBB1_2
+;
+; CHECK32-LABEL: foo:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: andi. r3, r3, 1
+; CHECK32-NEXT: bclr 4, gt, 0
+; CHECK32-NEXT: # %bb.1: # %for.body.preheader
+; CHECK32-NEXT: lwz r3, L..C0(r2) # %const.0
+; CHECK32-NEXT: xxlxor f1, f1, f1
+; CHECK32-NEXT: xxlxor vs0, vs0, vs0
+; CHECK32-NEXT: xscvdpspn vs35, f1
+; CHECK32-NEXT: lxv vs34, 0(r3)
+; CHECK32-NEXT: .align 4
+; CHECK32-NEXT: L..BB1_2: # %for.body
+; CHECK32-NEXT: #
+; CHECK32-NEXT: xscvdpspn vs36, f1
+; CHECK32-NEXT: xxspltiw vs1, 1170469888
+; CHECK32-NEXT: vperm v4, v4, v3, v2
+; CHECK32-NEXT: xvcmpgtsp vs2, vs0, vs36
+; CHECK32-NEXT: xvmaddasp vs1, vs36, vs0
+; CHECK32-NEXT: xxland vs1, vs2, vs1
+; CHECK32-NEXT: xscvspdpn f1, vs1
+; CHECK32-NEXT: b L..BB1_2
entry:
br i1 %cmp97, label %for.body, label %for.end
@@ -57,122 +192,7 @@ for.end: ; preds = %entry
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float>, <4 x float>)
-
-; CHECK64: bar:
-; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: lwz r5, 0(r5)
-; CHECK64-NEXT: cmpwi r5, 1
-; CHECK64-NEXT: bltlr cr0
-; CHECK64-NEXT: # %bb.1: # %for.body.preheader
-; CHECK64-NEXT: xxspltiw vs0, 1069066811
-; CHECK64-NEXT: xxspltiw vs1, 1170469888
-; CHECK64-NEXT: mtctr r5
-; CHECK64-NEXT: li r5, 0
-; CHECK64-NEXT: {{.*}}align 5
-; CHECK64-NEXT: [[L2_bar:.*]]: # %for.body
-; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK64-NEXT: lxvx vs2, r4, r5
-; CHECK64-NEXT: xvmaddmsp vs2, vs0, vs1
-; CHECK64-NEXT: stxvx vs2, r3, r5
-; CHECK64-NEXT: addi r5, r5, 16
-; CHECK64-NEXT: bdnz [[L2_bar]]
-; CHECK64-NEXT: # %bb.3: # %for.end
-; CHECK64-NEXT: blr
-
-; AIX64: .foo:
-; AIX64-NEXT: # %bb.0: # %entry
-; AIX64-NEXT: andi. r3, r3, 1
-; AIX64-NEXT: bclr 4, gt, 0
-; AIX64-NEXT: # %bb.1: # %for.body.preheader
-; AIX64-NEXT: xxlxor f0, f0, f0
-; AIX64-NEXT: xxlxor vs1, vs1, vs1
-; AIX64-NEXT: xxlxor f2, f2, f2
-; AIX64-NEXT: .align 4
-; AIX64-NEXT: L..BB1_2: # %for.body
-; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
-; AIX64-NEXT: xxmrghd vs2, vs2, vs0
-; AIX64-NEXT: xvcvdpsp vs34, vs2
-; AIX64-NEXT: xxmrghd vs2, vs0, vs0
-; AIX64-NEXT: xvcvdpsp vs35, vs2
-; AIX64-NEXT: xxspltiw vs2, 1170469888
-; AIX64-NEXT: vmrgew v2, v2, v3
-; AIX64-NEXT: xvcmpgtsp vs3, vs1, vs34
-; AIX64-NEXT: xvmaddasp vs2, vs34, vs1
-; AIX64-NEXT: xxland vs2, vs3, vs2
-; AIX64-NEXT: xscvspdpn f2, vs2
-; AIX64-NEXT: b L..BB1_2
-
-; LINUX64: foo: # @foo
-; LINUX64-NEXT: .Lfunc_begin1:
-; LINUX64-NEXT: .cfi_startproc
-; LINUX64-NEXT: # %bb.0: # %entry
-; LINUX64-NEXT: andi. r3, r3, 1
-; LINUX64-NEXT: bclr 4, gt, 0
-; LINUX64-NEXT: # %bb.1: # %for.body.preheader
-; LINUX64-NEXT: xxlxor f0, f0, f0
-; LINUX64-NEXT: xxlxor vs1, vs1, vs1
-; LINUX64-NEXT: xxlxor f2, f2, f2
-; LINUX64-NEXT: .p2align 4
-; LINUX64-NEXT: .LBB1_2: # %for.body
-; LINUX64-NEXT: # =>This Inner Loop Header: Depth=1
-; LINUX64-NEXT: xxmrghd vs2, vs0, vs2
-; LINUX64-NEXT: xvcvdpsp vs34, vs2
-; LINUX64-NEXT: xxspltd vs2, vs0, 0
-; LINUX64-NEXT: xvcvdpsp vs35, vs2
-; LINUX64-NEXT: xxspltiw vs2, 1170469888
-; LINUX64-NEXT: vmrgew v2, v3, v2
-; LINUX64-NEXT: xvcmpgtsp vs3, vs1, vs34
-; LINUX64-NEXT: xvmaddasp vs2, vs34, vs1
-; LINUX64-NEXT: xxland vs2, vs3, vs2
-; LINUX64-NEXT: xxsldwi vs2, vs2, vs2, 3
-; LINUX64-NEXT: xscvspdpn f2, vs2
-; LINUX64-NEXT: b .LBB1_2
-
-; CHECK32: .bar:
-; CHECK32-NEXT: # %bb.0: # %entry
-; CHECK32-NEXT: lwz r5, 0(r5)
-; CHECK32-NEXT: cmpwi r5, 0
-; CHECK32-NEXT: blelr cr0
-; CHECK32-NEXT: # %bb.1: # %for.body.preheader
-; CHECK32-NEXT: xxspltiw vs0, 1069066811
-; CHECK32-NEXT: xxspltiw vs1, 1170469888
-; CHECK32-NEXT: li r6, 0
-; CHECK32-NEXT: li r7, 0
-; CHECK32-NEXT: .align 4
-; CHECK32-NEXT: [[L2_foo:.*]]: # %for.body
-; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK32-NEXT: slwi r8, r7, 4
-; CHECK32-NEXT: addic r7, r7, 1
-; CHECK32-NEXT: addze r6, r6
-; CHECK32-NEXT: lxvx vs2, r4, r8
-; CHECK32-NEXT: xvmaddmsp vs2, vs0, vs1
-; CHECK32-NEXT: stxvx vs2, r3, r8
-; CHECK32-NEXT: xor r8, r7, r5
-; CHECK32-NEXT: or. r8, r8, r6
-; CHECK32-NEXT: bne cr0, [[L2_foo]]
-
-; CHECK32: .foo:
-; CHECK32-NEXT: # %bb.0: # %entry
-; CHECK32-NEXT: andi. r3, r3, 1
-; CHECK32-NEXT: bclr 4, gt, 0
-; CHECK32-NEXT: # %bb.1: # %for.body.preheader
-; CHECK32-NEXT: lwz r3, L..C0(r2) # %const.0
-; CHECK32-NEXT: xxlxor f1, f1, f1
-; CHECK32-NEXT: xxlxor vs0, vs0, vs0
-; CHECK32-NEXT: xscvdpspn vs35, f1
-; CHECK32-NEXT: lxv vs34, 0(r3)
-; CHECK32-NEXT: .align 4
-; CHECK32-NEXT: L..BB1_2: # %for.body
-; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK32-NEXT: xscvdpspn vs36, f1
-; CHECK32-NEXT: xxspltiw vs1, 1170469888
-; CHECK32-NEXT: vperm v4, v4, v3, v2
-; CHECK32-NEXT: xvcmpgtsp vs2, vs0, vs36
-; CHECK32-NEXT: xvmaddasp vs1, vs36, vs0
-; CHECK32-NEXT: xxland vs1, vs2, vs1
-; CHECK32-NEXT: xscvspdpn f1, vs1
-; CHECK32-NEXT: b L..BB1_2
diff --git a/llvm/test/CodeGen/X86/memfold-mov32r0.ll b/llvm/test/CodeGen/X86/memfold-mov32r0.ll
deleted file mode 100644
index f7cbf6c33c94c..0000000000000
--- a/llvm/test/CodeGen/X86/memfold-mov32r0.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
-
-; CHECK: movq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-define i32 @test() nounwind {
-entry:
- %div = udiv i256 0, 0
- store i256 %div, ptr null, align 16
- ret i32 0
-}
diff --git a/llvm/test/CodeGen/X86/memfold-mov32r0.mir b/llvm/test/CodeGen/X86/memfold-mov32r0.mir
new file mode 100644
index 0000000000000..729b8098a0266
--- /dev/null
+++ b/llvm/test/CodeGen/X86/memfold-mov32r0.mir
@@ -0,0 +1,143 @@
+# RUN: llc -start-after=early-machinelicm -mtriple=x86_64 %s -o - | FileCheck %s
+
+---
+name: test
+tracksRegLiveness: true
+isSSA: true
+body: |
+ ; CHECK: movq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+ bb.0:
+ successors: %bb.5(0x40000000), %bb.4(0x40000000)
+
+ %80:gr32 = MOV32r0 implicit-def dead $eflags
+ %15:gr64 = SUBREG_TO_REG 0, killed %80, %subreg.sub_32bit
+ %14:gr64 = COPY %15
+ %13:gr64 = COPY %15
+ %12:gr64 = COPY %15
+ %81:gr8 = MOV8ri 1
+ TEST8rr %81, %81, implicit-def $eflags
+ JCC_1 %bb.5, 5, implicit $eflags
+ JMP_1 %bb.4
+
+ bb.1:
+ successors: %bb.5(0x80000000)
+
+ %16:gr64 = PHI %82, %bb.4, %48, %bb.2
+ %17:gr64 = PHI %83, %bb.4, %49, %bb.2
+ %18:gr64 = PHI %84, %bb.4, %50, %bb.2
+ %19:gr64 = PHI %85, %bb.4, %51, %bb.2
+ %20:gr64 = PHI %72, %bb.4, %44, %bb.2
+ %21:gr64 = PHI %73, %bb.4, %45, %bb.2
+ %22:gr64 = PHI %74, %bb.4, %46, %bb.2
+ %23:gr64 = PHI %75, %bb.4, %47, %bb.2
+ %105:gr64 = SHLD64rri8 %23, %22, 1, implicit-def dead $eflags
+ %106:gr64 = SHLD64rri8 %22, %21, 1, implicit-def dead $eflags
+ %107:gr64 = SHLD64rri8 %21, %20, 1, implicit-def dead $eflags
+ %108:gr64 = ADD64rr %20, %20, implicit-def dead $eflags
+ %24:gr64 = OR64rr %16, killed %108, implicit-def dead $eflags
+ %25:gr64 = OR64rr %17, killed %107, implicit-def dead $eflags
+ %26:gr64 = OR64rr %18, killed %106, implicit-def dead $eflags
+ %27:gr64 = OR64rr %19, killed %105, implicit-def dead $eflags
+ JMP_1 %bb.5
+
+ bb.2:
+ successors: %bb.1(0x04000000), %bb.2(0x7c000000)
+
+ %28:gr64 = PHI %88, %bb.3, %48, %bb.2
+ %29:gr64 = PHI %89, %bb.3, %49, %bb.2
+ %30:gr64 = PHI %90, %bb.3, %50, %bb.2
+ %31:gr64 = PHI %91, %bb.3, %51, %bb.2
+ %32:gr64 = PHI %68, %bb.3, %56, %bb.2
+ %33:gr64 = PHI %69, %bb.3, %57, %bb.2
+ %34:gr64 = PHI %70, %bb.3, %58, %bb.2
+ %35:gr64 = PHI %71, %bb.3, %59, %bb.2
+ %36:gr64 = PHI %60, %bb.3, %52, %bb.2
+ %37:gr64 = PHI %61, %bb.3, %53, %bb.2
+ %38:gr64 = PHI %62, %bb.3, %54, %bb.2
+ %39:gr64 = PHI %63, %bb.3, %55, %bb.2
+ %40:gr64 = PHI %72, %bb.3, %44, %bb.2
+ %41:gr64 = PHI %73, %bb.3, %45, %bb.2
+ %42:gr64 = PHI %74, %bb.3, %46, %bb.2
+ %43:gr64 = PHI %75, %bb.3, %47, %bb.2
+ %55:gr64 = SHLD64rri8 %39, %38, 1, implicit-def dead $eflags
+ %54:gr64 = SHLD64rri8 %38, %37, 1, implicit-def dead $eflags
+ %53:gr64 = SHLD64rri8 %37, %36, 1, implicit-def dead $eflags
+ %52:gr64 = SHLD64rri8 %36, %43, 1, implicit-def dead $eflags
+ %93:gr64 = SHLD64rri8 %43, %42, 1, implicit-def dead $eflags
+ %94:gr64 = SHLD64rri8 %42, %41, 1, implicit-def dead $eflags
+ %95:gr64 = SHLD64rri8 %41, %40, 1, implicit-def dead $eflags
+ %96:gr64 = ADD64rr %40, %40, implicit-def dead $eflags
+ %44:gr64 = OR64rr %28, killed %96, implicit-def dead $eflags
+ %45:gr64 = OR64rr %29, killed %95, implicit-def dead $eflags
+ %46:gr64 = OR64rr %30, killed %94, implicit-def dead $eflags
+ %47:gr64 = OR64rr %31, killed %93, implicit-def dead $eflags
+ %97:gr64 = SUB64rr %64, %52, implicit-def $eflags
+ %98:gr64 = SBB64rr %65, %53, implicit-def $eflags, implicit $eflags
+ %99:gr64 = SBB64rr %66, %54, implicit-def $eflags, implicit $eflags
+ %100:gr64 = SBB64rr %67, %55, implicit-def dead $eflags, implicit $eflags
+ %48:gr64 = SHR64ri %100, 63, implicit-def dead $eflags
+ %49:gr64 = SUBREG_TO_REG 0, %92, %subreg.sub_32bit
+ %51:gr64 = COPY %49
+ %50:gr64 = COPY %49
+ %56:gr64 = ADD64ri32 %32, -1, implicit-def $eflags
+ %57:gr64 = ADC64ri32 %33, -1, implicit-def $eflags, implicit $eflags
+ %58:gr64 = ADC64ri32 %34, -1, implicit-def $eflags, implicit $eflags
+ %59:gr64 = ADC64ri32 %35, -1, implicit-def dead $eflags, implicit $eflags
+ %102:gr64 = OR64rr %57, %59, implicit-def dead $eflags
+ %103:gr64 = OR64rr %56, %58, implicit-def dead $eflags
+ %104:gr64 = OR64rr %103, killed %102, implicit-def $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
+ JMP_1 %bb.2
+
+ bb.3:
+ successors: %bb.2(0x80000000)
+
+ %92:gr32 = MOV32r0 implicit-def dead $eflags
+ %62:gr64 = SUBREG_TO_REG 0, %92, %subreg.sub_32bit
+ %63:gr64 = COPY %62
+ %61:gr64 = COPY %62
+ %60:gr64 = COPY %62
+ %66:gr64 = MOV64ri32 -1
+ %67:gr64 = COPY %66
+ %65:gr64 = COPY %66
+ %64:gr64 = COPY %66
+ %91:gr64 = COPY %62
+ %90:gr64 = COPY %62
+ %89:gr64 = COPY %62
+ %88:gr64 = COPY %62
+ JMP_1 %bb.2
+
+ bb.4:
+ successors: %bb.1(0x30000000), %bb.3(0x50000000)
+
+ %68:gr64 = MOV32ri64 1
+ %86:gr32 = MOV32r0 implicit-def dead $eflags
+ %74:gr64 = SUBREG_TO_REG 0, %86, %subreg.sub_32bit
+ %71:gr64 = COPY %74
+ %70:gr64 = COPY %74
+ %69:gr64 = COPY %74
+ %75:gr64 = COPY %74
+ %73:gr64 = COPY %74
+ %72:gr64 = COPY %74
+ %85:gr64 = COPY %74
+ %84:gr64 = COPY %74
+ %83:gr64 = COPY %74
+ %82:gr64 = COPY %74
+ %87:gr8 = COPY %86.sub_8bit
+ TEST8rr %87, %87, implicit-def $eflags
+ JCC_1 %bb.1, 5, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.5:
+ %76:gr64 = PHI %12, %bb.0, %24, %bb.1
+ %77:gr64 = PHI %13, %bb.0, %25, %bb.1
+ %78:gr64 = PHI %14, %bb.0, %26, %bb.1
+ %79:gr64 = PHI %15, %bb.0, %27, %bb.1
+ MOV64mr $noreg, 1, $noreg, 0, $noreg, %76 :: (store (s64) into `ptr null`, align 16)
+ MOV64mr $noreg, 1, $noreg, 8, $noreg, %77 :: (store (s64) into `ptr null` + 8, basealign 16)
+ MOV64mr $noreg, 1, $noreg, 16, $noreg, %78 :: (store (s64) into `ptr null` + 16, align 16)
+ MOV64mr $noreg, 1, $noreg, 24, $noreg, %79 :: (store (s64) into `ptr null` + 24, basealign 16)
+ %109:gr32 = MOV32r0 implicit-def dead $eflags
+ $eax = COPY %109
+ RET 0, $eax
+...
diff --git a/llvm/test/CodeGen/X86/pr57673.ll b/llvm/test/CodeGen/X86/pr57673.ll
index c3710a7fc462c..9d932cf376553 100644
--- a/llvm/test/CodeGen/X86/pr57673.ll
+++ b/llvm/test/CodeGen/X86/pr57673.ll
@@ -20,16 +20,16 @@ define void @foo() {
; NORMAL: bb.0.bb_entry:
; NORMAL-NEXT: successors: %bb.1(0x80000000)
; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: [[MOV32r0_:%[0-9]+]]:gr8 = IMPLICIT_DEF
- ; NORMAL-NEXT: [[COPY:%[0-9]+]]:gr8 = IMPLICIT_DEF
- ; NORMAL-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
+ ; NORMAL-NEXT: [[DEF:%[0-9]+]]:gr8 = IMPLICIT_DEF
+ ; NORMAL-NEXT: [[DEF1:%[0-9]+]]:gr8 = IMPLICIT_DEF
+ ; NORMAL-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
; NORMAL-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.1.i, 1, $noreg, 0, $noreg
- ; NORMAL-NEXT: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF
+ ; NORMAL-NEXT: [[DEF2:%[0-9]+]]:gr64 = IMPLICIT_DEF
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: bb.1.bb_8:
; NORMAL-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: TEST8rr [[MOV32r0_]], [[COPY]], implicit-def $eflags
+ ; NORMAL-NEXT: TEST8rr [[DEF]], [[DEF1]], implicit-def $eflags
; NORMAL-NEXT: JCC_1 %bb.3, 5, implicit $eflags
; NORMAL-NEXT: JMP_1 %bb.2
; NORMAL-NEXT: {{ $}}
@@ -38,7 +38,7 @@ define void @foo() {
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: [[MOVUPSrm:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i4, align 8)
; NORMAL-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm]] :: (store (s128) into `ptr null`, align 8)
- ; NORMAL-NEXT: DBG_VALUE_LIST !3, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 40, DW_OP_stack_value), %stack.1.i, %stack.1.i, debug-location !8
+ ; NORMAL-NEXT: DBG_VALUE_LIST !3, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 40, DW_OP_stack_value), %stack.1.i, %stack.1.i, debug-location !8
; NORMAL-NEXT: [[MOVUPSrm1:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i6, align 8)
; NORMAL-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm1]] :: (store (s128) into `ptr null`, align 8)
; NORMAL-NEXT: {{ $}}
@@ -46,13 +46,13 @@ define void @foo() {
; NORMAL-NEXT: successors: %bb.1(0x80000000)
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; NORMAL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[MOV32r0_1]], %subreg.sub_32bit
+ ; NORMAL-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[MOV32r0_]], %subreg.sub_32bit
; NORMAL-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
; NORMAL-NEXT: $rsi = COPY [[SUBREG_TO_REG]]
; NORMAL-NEXT: $rdx = COPY [[SUBREG_TO_REG]]
- ; NORMAL-NEXT: $ecx = COPY [[MOV32r0_1]]
+ ; NORMAL-NEXT: $ecx = COPY [[MOV32r0_]]
; NORMAL-NEXT: $r8 = COPY [[LEA64r]]
- ; NORMAL-NEXT: CALL64r [[DEF]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $ecx, implicit $r8, implicit-def $rsp, implicit-def $ssp
+ ; NORMAL-NEXT: CALL64r [[DEF2]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $ecx, implicit $r8, implicit-def $rsp, implicit-def $ssp
; NORMAL-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; NORMAL-NEXT: JMP_1 %bb.1
;
@@ -60,16 +60,16 @@ define void @foo() {
; INSTRREF: bb.0.bb_entry:
; INSTRREF-NEXT: successors: %bb.1(0x80000000)
; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: [[MOV32r0_:%[0-9]+]]:gr8 = IMPLICIT_DEF
- ; INSTRREF-NEXT: [[COPY:%[0-9]+]]:gr8 = IMPLICIT_DEF
- ; INSTRREF-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
+ ; INSTRREF-NEXT: [[DEF:%[0-9]+]]:gr8 = IMPLICIT_DEF
+ ; INSTRREF-NEXT: [[DEF1:%[0-9]+]]:gr8 = IMPLICIT_DEF
+ ; INSTRREF-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
; INSTRREF-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.1.i, 1, $noreg, 0, $noreg
- ; INSTRREF-NEXT: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF
+ ; INSTRREF-NEXT: [[DEF2:%[0-9]+]]:gr64 = IMPLICIT_DEF
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: bb.1.bb_8:
; INSTRREF-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: TEST8rr [[MOV32r0_]], [[COPY]], implicit-def $eflags
+ ; INSTRREF-NEXT: TEST8rr [[DEF]], [[DEF1]], implicit-def $eflags
; INSTRREF-NEXT: JCC_1 %bb.3, 5, implicit $eflags
; INSTRREF-NEXT: JMP_1 %bb.2
; INSTRREF-NEXT: {{ $}}
@@ -78,7 +78,7 @@ define void @foo() {
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: [[MOVUPSrm:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i4, align 8)
; INSTRREF-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm]] :: (store (s128) into `ptr null`, align 8)
- ; INSTRREF-NEXT: DBG_VALUE_LIST !3, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 40, DW_OP_stack_value), %stack.1.i, %stack.1.i, debug-location !8
+ ; INSTRREF-NEXT: DBG_VALUE_LIST !3, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 40, DW_OP_stack_value), %stack.1.i, %stack.1.i, debug-location !8
; INSTRREF-NEXT: [[MOVUPSrm1:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i6, align 8)
; INSTRREF-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm1]] :: (store (s128) into `ptr null`, align 8)
; INSTRREF-NEXT: {{ $}}
@@ -86,13 +86,13 @@ define void @foo() {
; INSTRREF-NEXT: successors: %bb.1(0x80000000)
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; INSTRREF-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[MOV32r0_1]], %subreg.sub_32bit
+ ; INSTRREF-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, [[MOV32r0_]], %subreg.sub_32bit
; INSTRREF-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
; INSTRREF-NEXT: $rsi = COPY [[SUBREG_TO_REG]]
; INSTRREF-NEXT: $rdx = COPY [[SUBREG_TO_REG]]
- ; INSTRREF-NEXT: $ecx = COPY [[MOV32r0_1]]
+ ; INSTRREF-NEXT: $ecx = COPY [[MOV32r0_]]
; INSTRREF-NEXT: $r8 = COPY [[LEA64r]]
- ; INSTRREF-NEXT: CALL64r [[DEF]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $ecx, implicit $r8, implicit-def $rsp, implicit-def $ssp
+ ; INSTRREF-NEXT: CALL64r [[DEF2]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $ecx, implicit $r8, implicit-def $rsp, implicit-def $ssp
; INSTRREF-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; INSTRREF-NEXT: JMP_1 %bb.1
bb_entry:
More information about the llvm-commits
mailing list