[llvm] 4af5b23 - [X86FixupSetCC] Remember the preceding eflags defining instruction while we're scanning the basic block instead of looking back for it.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 25 10:26:32 PST 2019
Author: Craig Topper
Date: 2019-12-25T10:26:13-08:00
New Revision: 4af5b23db308c89edeb9fdc6dfbe7e6457b22f1d
URL: https://github.com/llvm/llvm-project/commit/4af5b23db308c89edeb9fdc6dfbe7e6457b22f1d
DIFF: https://github.com/llvm/llvm-project/commit/4af5b23db308c89edeb9fdc6dfbe7e6457b22f1d.diff
LOG: [X86FixupSetCC] Remember the preceding eflags defining instruction while we're scanning the basic block instead of looking back for it.
Summary:
We're already scanning forward through the basic block. Might as
well just remember eflags defs instead of doing a bounded search
backwards later.
Based on a comment in D71841.
Reviewers: RKSimon, spatel, uweigand
Reviewed By: uweigand
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71865
Added:
Modified:
llvm/lib/Target/X86/X86FixupSetCC.cpp
llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 8ecdce4fd0d3..924f429fc138 100644
--- a/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -43,14 +43,6 @@ class X86FixupSetCCPass : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- // Find the preceding instruction that imp-defs eflags.
- MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB,
- MachineBasicBlock::reverse_iterator MI);
-
- // Return true if this is the opcode of a SetCC instruction with a register
- // output.
- bool isSetCCr(unsigned Opode);
-
MachineRegisterInfo *MRI = nullptr;
const X86InstrInfo *TII = nullptr;
@@ -64,22 +56,6 @@ char X86FixupSetCCPass::ID = 0;
FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
-// We expect the instruction *immediately* before the setcc to imp-def
-// EFLAGS (because of scheduling glue). To make this less brittle w.r.t
-// scheduling, look backwards until we hit the beginning of the
-// basic-block, or a small bound (to avoid quadratic behavior).
-MachineInstr *
-X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
- MachineBasicBlock::reverse_iterator MI) {
- // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator?
- auto MBBStart = MBB->rend();
- for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
- if (MI->definesRegister(X86::EFLAGS))
- return &*MI;
-
- return nullptr;
-}
-
bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MRI = &MF.getRegInfo();
@@ -88,7 +64,12 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr*, 4> ToErase;
for (auto &MBB : MF) {
+ MachineInstr *FlagsDefMI = nullptr;
for (auto &MI : MBB) {
+ // Remember the most recent preceding eflags defining instruction.
+ if (MI.definesRegister(X86::EFLAGS))
+ FlagsDefMI = &MI;
+
// Find a setcc that is used by a zext.
// This doesn't have to be the only use, the transformation is safe
// regardless.
@@ -103,9 +84,6 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
if (!ZExt)
continue;
- // Find the preceding instruction that imp-defs eflags.
- MachineInstr *FlagsDefMI = findFlagsImpDef(
- MI.getParent(), MachineBasicBlock::reverse_iterator(&MI));
if (!FlagsDefMI)
continue;
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
index 6310ae3e99e5..2a51ac487dda 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
@@ -194,7 +194,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: movl %eax, %edi
+; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
@@ -210,8 +210,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: movl %eax, %esi
-; AVX512VL-32-NEXT: xorl %edx, %edx
+; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
@@ -221,12 +220,14 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX512VL-32-NEXT: setae %dl
-; AVX512VL-32-NEXT: shll $31, %edx
-; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512VL-32-NEXT: movl $0, %eax
+; AVX512VL-32-NEXT: setae %al
+; AVX512VL-32-NEXT: shll $31, %eax
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX512VL-32-NEXT: xorl %ecx, %ecx
+; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
@@ -236,13 +237,13 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
-; AVX512VL-32-NEXT: setae %cl
-; AVX512VL-32-NEXT: shll $31, %ecx
-; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512VL-32-NEXT: setae %dl
+; AVX512VL-32-NEXT: shll $31, %edx
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
-; AVX512VL-32-NEXT: setb %bl
-; AVX512VL-32-NEXT: kmovw %ebx, %k1
+; AVX512VL-32-NEXT: setb %cl
+; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1}
; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
@@ -252,6 +253,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
@@ -263,21 +265,20 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
-; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
+; AVX512VL-32-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
-; AVX512VL-32-NEXT: setae %al
-; AVX512VL-32-NEXT: movzbl %al, %eax
-; AVX512VL-32-NEXT: shll $31, %eax
-; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: setae %cl
+; AVX512VL-32-NEXT: shll $31, %ecx
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
@@ -498,7 +499,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: movl %eax, %edi
+; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
@@ -514,8 +515,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: movl %eax, %esi
-; AVX512VL-32-NEXT: xorl %edx, %edx
+; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
@@ -525,11 +525,13 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX512VL-32-NEXT: setae %dl
-; AVX512VL-32-NEXT: shll $31, %edx
-; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512VL-32-NEXT: movl $0, %eax
+; AVX512VL-32-NEXT: setae %al
+; AVX512VL-32-NEXT: shll $31, %eax
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3]
-; AVX512VL-32-NEXT: xorl %ecx, %ecx
+; AVX512VL-32-NEXT: xorl %edx, %edx
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
@@ -539,14 +541,14 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, (%esp)
; AVX512VL-32-NEXT: flds (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
-; AVX512VL-32-NEXT: setae %cl
-; AVX512VL-32-NEXT: shll $31, %ecx
-; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512VL-32-NEXT: setae %dl
+; AVX512VL-32-NEXT: shll $31, %edx
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
-; AVX512VL-32-NEXT: setb %bl
-; AVX512VL-32-NEXT: kmovw %ebx, %k1
+; AVX512VL-32-NEXT: setb %cl
+; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
@@ -556,6 +558,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: xorl %ecx, %ecx
; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
@@ -567,21 +570,20 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
-; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
-; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm1, %xmm1
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
+; AVX512VL-32-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
-; AVX512VL-32-NEXT: setae %al
-; AVX512VL-32-NEXT: movzbl %al, %eax
-; AVX512VL-32-NEXT: shll $31, %eax
-; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: setae %cl
+; AVX512VL-32-NEXT: shll $31, %ecx
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload
; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
More information about the llvm-commits
mailing list