[llvm] 8fdafb7 - Insert wait instruction after X87 instructions which could raise
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 15 20:13:10 PST 2020
Author: Liu, Chen3
Date: 2020-01-16T12:12:51+08:00
New Revision: 8fdafb7dced812b2dc0af77f9668bfe23b4ffb0b
URL: https://github.com/llvm/llvm-project/commit/8fdafb7dced812b2dc0af77f9668bfe23b4ffb0b
DIFF: https://github.com/llvm/llvm-project/commit/8fdafb7dced812b2dc0af77f9668bfe23b4ffb0b.diff
LOG: Insert wait instruction after X87 instructions which could raise
float-point exception.
This patch also modify some mayRaiseFPException flag which set in D68854.
Differential Revision: https://reviews.llvm.org/D72750
Added:
llvm/lib/Target/X86/X86InsertWait.cpp
Modified:
llvm/lib/Target/X86/CMakeLists.txt
llvm/lib/Target/X86/X86.h
llvm/lib/Target/X86/X86FloatingPoint.cpp
llvm/lib/Target/X86/X86InstrFPStack.td
llvm/lib/Target/X86/X86TargetMachine.cpp
llvm/test/CodeGen/X86/O0-pipeline.ll
llvm/test/CodeGen/X86/O3-pipeline.ll
llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
llvm/test/CodeGen/X86/fp-intrinsics.ll
llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
llvm/test/CodeGen/X86/fp-strict-scalar.ll
llvm/test/CodeGen/X86/fp128-cast-strict.ll
llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll
llvm/test/CodeGen/X86/fp80-strict-scalar.ll
llvm/test/CodeGen/X86/vec-strict-128.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 58f2292dd4cd..3f0d68c0c788 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -69,6 +69,7 @@ set(sources
X86VZeroUpper.cpp
X86WinAllocaExpander.cpp
X86WinEHState.cpp
+ X86InsertWait.cpp
)
add_llvm_target(X86CodeGen ${sources})
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 0481a40d462a..604438f83531 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -129,6 +129,10 @@ FunctionPass *createX86DiscriminateMemOpsPass();
/// This pass applies profiling information to insert cache prefetches.
FunctionPass *createX86InsertPrefetchPass();
+/// This pass insert wait instruction after X87 instructions which could raise
+/// fp exceptions when strict-fp enabled.
+FunctionPass *createX86InsertX87waitPass();
+
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 13bbd6ccfce4..e6ee46957500 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -1364,6 +1364,9 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
MBB->remove(&*I++);
I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+ if (!MI.mayRaiseFPException())
+ I->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
// If both operands are killed, pop one off of the stack in addition to
// overwriting the other one.
if (KillsOp0 && KillsOp1 && Op0 != Op1) {
diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp
new file mode 100644
index 000000000000..a82d98d88b30
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -0,0 +1,151 @@
+//- X86Insertwait.cpp - Strict-Fp:Insert wait instruction X87 instructions --//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which insert x86 wait instructions after each
+// X87 instructions when strict float is enabled.
+//
+// The logic to insert a wait instruction after an X87 instruction is as below:
+// 1. If the X87 instruction don't raise float exception nor is a load/store
+// instruction, or is a x87 control instruction, don't insert wait.
+// 2. If the X87 instruction is an instruction which the following instruction
+// is an X87 exception synchronizing X87 instruction, don't insert wait.
+// 3. For other situations, insert wait instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-insert-wait"
+
+namespace {
+
+class WaitInsert : public MachineFunctionPass {
+public:
+ static char ID;
+
+ WaitInsert() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "X86 insert wait instruction";
+ }
+
+private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+};
+
+} // namespace
+
+char WaitInsert::ID = 0;
+
+FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); }
+
+/// Return true if the Reg is X87 register.
+static bool isX87Reg(unsigned Reg) {
+ return (Reg == X86::FPCW || Reg == X86::FPSW ||
+ (Reg >= X86::ST0 && Reg <= X86::ST7));
+}
+
+/// check if the instruction is X87 instruction
+static bool isX87Instruction(MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (isX87Reg(MO.getReg()))
+ return true;
+ }
+ return false;
+}
+
+static bool isX87ControlInstruction(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case X86::FNINIT:
+ case X86::FLDCW16m:
+ case X86::FNSTCW16m:
+ case X86::FNSTSW16r:
+ case X86::FNSTSWm:
+ case X86::FNCLEX:
+ case X86::FLDENVm:
+ case X86::FSTENVm:
+ case X86::FRSTORm:
+ case X86::FSAVEm:
+ case X86::FINCSTP:
+ case X86::FDECSTP:
+ case X86::FFREE:
+ case X86::FFREEP:
+ case X86::FNOP:
+ case X86::WAIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool isX87NonWaitingControlInstruction(MachineInstr &MI) {
+ // a few special control instructions don't perform a wait operation
+ switch (MI.getOpcode()) {
+ case X86::FNINIT:
+ case X86::FNSTSW16r:
+ case X86::FNSTSWm:
+ case X86::FNSTCW16m:
+ case X86::FNCLEX:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::StrictFP))
+ return false;
+
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ TII = ST.getInstrInfo();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+ // Jump non X87 instruction.
+ if (!isX87Instruction(*MI))
+ continue;
+ // If the instruction instruction neither has float exception nor is
+ // a load/store instruction, or the instruction is x87 control
+ // instruction, do not insert wait.
+ if (!(MI->mayRaiseFPException() || MI->mayLoadOrStore()) ||
+ isX87ControlInstruction(*MI))
+ continue;
+ // If the following instruction is an X87 instruction and isn't an X87
+ // non-waiting control instruction, we can omit insert wait instruction.
+ MachineBasicBlock::iterator AfterMI = std::next(MI);
+ if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) &&
+ !isX87NonWaitingControlInstruction(*AfterMI))
+ continue;
+
+ BuildMI(MBB, AfterMI, MI->getDebugLoc(), TII->get(X86::WAIT));
+ LLVM_DEBUG(dbgs() << "\nInsert wait after:\t" << *MI);
+ // Jump the newly inserting wait
+ ++MI;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index 1830262205c6..677859ef0805 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -601,6 +601,7 @@ let SchedRW = [WriteMove], Uses = [FPCW] in {
def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
+let mayRaiseFPException = 0 in
def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
}
@@ -620,13 +621,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
-let SchedRW = [WriteFLD0], Uses = [FPCW] in
+let SchedRW = [WriteFLD0], Uses = [FPCW], mayRaiseFPException = 0 in
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
-let SchedRW = [WriteFLD1], Uses = [FPCW] in
+let SchedRW = [WriteFLD1], Uses = [FPCW], mayRaiseFPException = 0 in
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
-let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW], mayRaiseFPException = 0 in {
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8c696e9adbed..22b4e2805a5e 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -519,6 +519,7 @@ void X86PassConfig::addPreEmitPass() {
}
addPass(createX86DiscriminateMemOpsPass());
addPass(createX86InsertPrefetchPass());
+ addPass(createX86InsertX87waitPass());
}
void X86PassConfig::addPreEmitPass2() {
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 33ecad677a63..5dfe02147e13 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -65,6 +65,7 @@
; CHECK-NEXT: X86 vzeroupper inserter
; CHECK-NEXT: X86 Discriminate Memory Operands
; CHECK-NEXT: X86 Insert Cache Prefetches
+; CHECK-NEXT: X86 insert wait instruction
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll
index 575b704b8b4c..3d96753b553f 100644
--- a/llvm/test/CodeGen/X86/O3-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O3-pipeline.ll
@@ -174,6 +174,7 @@
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible
; CHECK-NEXT: X86 Discriminate Memory Operands
; CHECK-NEXT: X86 Insert Cache Prefetches
+; CHECK-NEXT: X86 insert wait instruction
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis
diff --git a/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll b/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
index 9c408c70cfbf..efa86e241bad 100644
--- a/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
+++ b/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
@@ -6,6 +6,7 @@ define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: flds -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
entry:
%ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(
@@ -19,6 +20,7 @@ define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
; CHECK-NEXT: fstps -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq
%trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80(
@@ -33,6 +35,7 @@ define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
entry:
%ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(
@@ -46,6 +49,7 @@ define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
; CHECK-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
%trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80(
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index e00248b22df8..27f198168e38 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -19,6 +19,7 @@ define double @f1() #0 {
; X87: # %bb.0: # %entry
; X87-NEXT: fld1
; X87-NEXT: fdivs {{\.LCPI.*}}
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f1:
@@ -29,6 +30,7 @@ define double @f1() #0 {
; X86-SSE-NEXT: divsd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -66,6 +68,7 @@ define double @f2(double %a) #0 {
; X87: # %bb.0: # %entry
; X87-NEXT: fldz
; X87-NEXT: fsubrl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f2:
@@ -77,6 +80,7 @@ define double @f2(double %a) #0 {
; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -119,6 +123,7 @@ define double @f3(double %a, double %b) #0 {
; X87-NEXT: fsubl {{[0-9]+}}(%esp)
; X87-NEXT: fmull {{[0-9]+}}(%esp)
; X87-NEXT: fsubrp %st, %st(1)
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f3:
@@ -132,6 +137,7 @@ define double @f3(double %a, double %b) #0 {
; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -185,11 +191,13 @@ define double @f4(i32 %n, double %a) #0 {
; X87-LABEL: f4:
; X87: # %bb.0: # %entry
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X87-NEXT: jle .LBB3_2
; X87-NEXT: # %bb.1: # %if.then
; X87-NEXT: fld1
; X87-NEXT: faddp %st, %st(1)
+; X87-NEXT: wait
; X87-NEXT: .LBB3_2: # %if.end
; X87-NEXT: retl
;
@@ -205,6 +213,7 @@ define double @f4(i32 %n, double %a) #0 {
; X86-SSE-NEXT: .LBB3_2: # %if.end
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -248,6 +257,7 @@ define double @f5() #0 {
; X87: # %bb.0: # %entry
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fsqrt
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f5:
@@ -258,6 +268,7 @@ define double @f5() #0 {
; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -290,6 +301,7 @@ define double @f6() #0 {
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll pow
; X87-NEXT: addl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -345,6 +357,7 @@ define double @f7() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: movl $3, {{[0-9]+}}(%esp)
; X87-NEXT: calll __powidf2
; X87-NEXT: addl $12, %esp
@@ -400,6 +413,7 @@ define double @f8() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll sin
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -450,6 +464,7 @@ define double @f9() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll cos
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -500,6 +515,7 @@ define double @f10() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll exp
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -550,6 +566,7 @@ define double @f11() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll exp2
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -600,6 +617,7 @@ define double @f12() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll log
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -650,6 +668,7 @@ define double @f13() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll log10
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -700,6 +719,7 @@ define double @f14() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll log2
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -750,6 +770,7 @@ define double @f15() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll rint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -797,6 +818,7 @@ define double @f16() #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll nearbyint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -843,6 +865,7 @@ define double @f19() #0 {
; X87-NEXT: .cfi_def_cfa_offset 32
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: movl $1072693248, {{[0-9]+}}(%esp) # imm = 0x3FF00000
; X87-NEXT: movl $0, (%esp)
; X87-NEXT: calll fmod
@@ -904,6 +927,7 @@ define i8 @f20s8(double %x) #0 {
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -950,6 +974,7 @@ define i16 @f20s16(double %x) #0 {
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -994,6 +1019,7 @@ define i32 @f20s(double %x) #0 {
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw (%esp)
; X87-NEXT: movzwl (%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1036,6 +1062,7 @@ define i64 @f20s64(double %x) #0 {
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1056,6 +1083,7 @@ define i64 @f20s64(double %x) #0 {
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1096,6 +1124,7 @@ define i128 @f20s128(double %x) nounwind strictfp {
; X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: calll __fixdfti
@@ -1170,6 +1199,7 @@ define i8 @f20u8(double %x) #0 {
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1215,6 +1245,7 @@ define i16 @f20u16(double %x) #0 {
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw (%esp)
; X87-NEXT: movzwl (%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1262,6 +1293,7 @@ define i32 @f20u(double %x) #0 {
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1325,14 +1357,17 @@ define i64 @f20u64(double %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: flds {{\.LCPI.*}}
+; X87-NEXT: wait
; X87-NEXT: xorl %edx, %edx
; X87-NEXT: fcomi %st(1), %st
+; X87-NEXT: wait
; X87-NEXT: setbe %dl
; X87-NEXT: fldz
; X87-NEXT: fxch %st(1)
; X87-NEXT: fcmovnbe %st(1), %st
; X87-NEXT: fstp %st(1)
; X87-NEXT: fsubrp %st, %st(1)
+; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1363,6 +1398,7 @@ define i64 @f20u64(double %x) #0 {
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: setbe %al
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -1435,6 +1471,7 @@ define i128 @f20u128(double %x) nounwind strictfp {
; X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: calll __fixunsdfti
@@ -1509,6 +1546,7 @@ define float @f21() #0 {
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstps (%esp)
; X87-NEXT: flds (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -1521,6 +1559,7 @@ define float @f21() #0 {
; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -1548,6 +1587,7 @@ define double @f22(float %x) #0 {
; X87-LABEL: f22:
; X87: # %bb.0: # %entry
; X87-NEXT: flds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f22:
@@ -1558,6 +1598,7 @@ define double @f22(float %x) #0 {
; X86-SSE-NEXT: cvtss2sd %xmm0, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -1584,6 +1625,7 @@ define i32 @f23(double %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll lrint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1631,6 +1673,7 @@ define i32 @f24(float %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll lrintf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1678,6 +1721,7 @@ define i64 @f25(double %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll llrint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1772,6 +1816,7 @@ define i32 @f27(double %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll lround
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1818,6 +1863,7 @@ define i32 @f28(float %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll lroundf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1864,6 +1910,7 @@ define i64 @f29(double %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll llround
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1910,6 +1957,7 @@ define i64 @f30(float %x) #0 {
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll llroundf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
@@ -1960,6 +2008,7 @@ define double @sifdb(i8 %x) #0 {
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -1972,6 +2021,7 @@ define double @sifdb(i8 %x) #0 {
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2002,6 +2052,7 @@ define double @sifdw(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2014,6 +2065,7 @@ define double @sifdw(i16 %x) #0 {
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2044,6 +2096,7 @@ define double @sifdi(i32 %x) #0 {
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2055,6 +2108,7 @@ define double @sifdi(i32 %x) #0 {
; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2083,6 +2137,7 @@ define float @siffb(i8 %x) #0 {
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2095,6 +2150,7 @@ define float @siffb(i8 %x) #0 {
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2125,6 +2181,7 @@ define float @siffw(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2137,6 +2194,7 @@ define float @siffw(i16 %x) #0 {
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2167,6 +2225,7 @@ define float @siffi(i32 %x) #0 {
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2178,6 +2237,7 @@ define float @siffi(i32 %x) #0 {
; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2202,6 +2262,7 @@ define double @sifdl(i64 %x) #0 {
; X87-LABEL: sifdl:
; X87: # %bb.0: # %entry
; X87-NEXT: fildll {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdl:
@@ -2211,6 +2272,7 @@ define double @sifdl(i64 %x) #0 {
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fstpl (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2235,6 +2297,7 @@ define float @siffl(i64 %x) #0 {
; X87-LABEL: siffl:
; X87: # %bb.0: # %entry
; X87-NEXT: fildll {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffl:
@@ -2244,6 +2307,7 @@ define float @siffl(i64 %x) #0 {
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fstps (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2275,6 +2339,7 @@ define double @uifdb(i8 %x) #0 {
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2287,6 +2352,7 @@ define double @uifdb(i8 %x) #0 {
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2317,6 +2383,7 @@ define double @uifdw(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2329,6 +2396,7 @@ define double @uifdw(i16 %x) #0 {
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2360,6 +2428,7 @@ define double @uifdi(i32 %x) #0 {
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
+; X87-NEXT: wait
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2374,6 +2443,7 @@ define double @uifdi(i32 %x) #0 {
; X86-SSE-NEXT: subsd %xmm0, %xmm1
; X86-SSE-NEXT: movsd %xmm1, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2415,6 +2485,7 @@ define double @uifdl(i64 %x) #0 {
; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2431,6 +2502,7 @@ define double @uifdl(i64 %x) #0 {
; X86-SSE-NEXT: addpd %xmm0, %xmm1
; X86-SSE-NEXT: movlpd %xmm1, (%esp)
; X86-SSE-NEXT: fldl (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2473,6 +2545,7 @@ define float @uiffb(i8 %x) #0 {
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2485,6 +2558,7 @@ define float @uiffb(i8 %x) #0 {
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2515,6 +2589,7 @@ define float @uiffw(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2527,6 +2602,7 @@ define float @uiffw(i16 %x) #0 {
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2558,6 +2634,7 @@ define float @uiffi(i32 %x) #0 {
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
+; X87-NEXT: wait
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2574,6 +2651,7 @@ define float @uiffi(i32 %x) #0 {
; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
@@ -2615,6 +2693,7 @@ define float @uiffl(i64 %x) #0 {
; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -2630,9 +2709,11 @@ define float @uiffl(i64 %x) #0 {
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
+; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
index 45d7d513e6fb..1bc308bef8cc 100644
--- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
@@ -7,9 +7,11 @@ define float @ceil(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _ceil
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"fpexcept.strict") #0
@@ -22,9 +24,11 @@ define float @cos(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _cos
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -37,9 +41,11 @@ define float @exp(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _exp
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -52,9 +58,11 @@ define float @floor(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _floor
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"fpexcept.strict") #0
@@ -70,9 +78,11 @@ define float @frem(float %x, float %y) #0 {
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstpl {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _fmod
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $20, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -85,9 +95,11 @@ define float @log(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _log
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -100,9 +112,11 @@ define float @log10(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _log10
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -118,9 +132,11 @@ define float @pow(float %x, float %y) #0 {
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstpl {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _pow
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $20, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -133,9 +149,11 @@ define float @sin(float %x) #0 {
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: calll _sin
; CHECK-NEXT: fstps {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
index f27076028198..115e16583bf4 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
@@ -52,6 +52,7 @@ define i32 @test_f32_oeq_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -71,6 +72,7 @@ define i32 @test_f32_oeq_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -124,6 +126,7 @@ define i32 @test_f32_ogt_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -143,6 +146,7 @@ define i32 @test_f32_ogt_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -195,6 +199,7 @@ define i32 @test_f32_oge_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -214,6 +219,7 @@ define i32 @test_f32_oge_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -266,6 +272,7 @@ define i32 @test_f32_olt_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -285,6 +292,7 @@ define i32 @test_f32_olt_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -337,6 +345,7 @@ define i32 @test_f32_ole_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -356,6 +365,7 @@ define i32 @test_f32_ole_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -408,6 +418,7 @@ define i32 @test_f32_one_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -427,6 +438,7 @@ define i32 @test_f32_one_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -479,6 +491,7 @@ define i32 @test_f32_ord_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -498,6 +511,7 @@ define i32 @test_f32_ord_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnpl %eax, %ecx
@@ -550,6 +564,7 @@ define i32 @test_f32_ueq_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -569,6 +584,7 @@ define i32 @test_f32_ueq_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovel %eax, %ecx
@@ -621,6 +637,7 @@ define i32 @test_f32_ugt_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -640,6 +657,7 @@ define i32 @test_f32_ugt_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -692,6 +710,7 @@ define i32 @test_f32_uge_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -711,6 +730,7 @@ define i32 @test_f32_uge_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -763,6 +783,7 @@ define i32 @test_f32_ult_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -782,6 +803,7 @@ define i32 @test_f32_ult_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -834,6 +856,7 @@ define i32 @test_f32_ule_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -853,6 +876,7 @@ define i32 @test_f32_ule_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -909,6 +933,7 @@ define i32 @test_f32_une_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -928,6 +953,7 @@ define i32 @test_f32_une_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -981,6 +1007,7 @@ define i32 @test_f32_uno_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1000,6 +1027,7 @@ define i32 @test_f32_uno_q(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovpl %eax, %ecx
@@ -1056,6 +1084,7 @@ define i32 @test_f64_oeq_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1075,6 +1104,7 @@ define i32 @test_f64_oeq_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -1128,6 +1158,7 @@ define i32 @test_f64_ogt_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1147,6 +1178,7 @@ define i32 @test_f64_ogt_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -1199,6 +1231,7 @@ define i32 @test_f64_oge_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1218,6 +1251,7 @@ define i32 @test_f64_oge_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -1270,6 +1304,7 @@ define i32 @test_f64_olt_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1289,6 +1324,7 @@ define i32 @test_f64_olt_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -1341,6 +1377,7 @@ define i32 @test_f64_ole_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1360,6 +1397,7 @@ define i32 @test_f64_ole_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -1412,6 +1450,7 @@ define i32 @test_f64_one_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1431,6 +1470,7 @@ define i32 @test_f64_one_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -1483,6 +1523,7 @@ define i32 @test_f64_ord_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1502,6 +1543,7 @@ define i32 @test_f64_ord_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnpl %eax, %ecx
@@ -1554,6 +1596,7 @@ define i32 @test_f64_ueq_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1573,6 +1616,7 @@ define i32 @test_f64_ueq_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovel %eax, %ecx
@@ -1625,6 +1669,7 @@ define i32 @test_f64_ugt_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1644,6 +1689,7 @@ define i32 @test_f64_ugt_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -1696,6 +1742,7 @@ define i32 @test_f64_uge_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1715,6 +1762,7 @@ define i32 @test_f64_uge_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -1767,6 +1815,7 @@ define i32 @test_f64_ult_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1786,6 +1835,7 @@ define i32 @test_f64_ult_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -1838,6 +1888,7 @@ define i32 @test_f64_ule_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1857,6 +1908,7 @@ define i32 @test_f64_ule_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -1913,6 +1965,7 @@ define i32 @test_f64_une_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -1932,6 +1985,7 @@ define i32 @test_f64_une_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -1985,6 +2039,7 @@ define i32 @test_f64_uno_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fucompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2004,6 +2059,7 @@ define i32 @test_f64_uno_q(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fucompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovpl %eax, %ecx
@@ -2060,6 +2116,7 @@ define i32 @test_f32_oeq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2079,6 +2136,7 @@ define i32 @test_f32_oeq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -2132,6 +2190,7 @@ define i32 @test_f32_ogt_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2151,6 +2210,7 @@ define i32 @test_f32_ogt_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -2203,6 +2263,7 @@ define i32 @test_f32_oge_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2222,6 +2283,7 @@ define i32 @test_f32_oge_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -2274,6 +2336,7 @@ define i32 @test_f32_olt_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2293,6 +2356,7 @@ define i32 @test_f32_olt_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -2345,6 +2409,7 @@ define i32 @test_f32_ole_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2364,6 +2429,7 @@ define i32 @test_f32_ole_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -2416,6 +2482,7 @@ define i32 @test_f32_one_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2435,6 +2502,7 @@ define i32 @test_f32_one_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -2487,6 +2555,7 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2506,6 +2575,7 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnpl %eax, %ecx
@@ -2558,6 +2628,7 @@ define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2577,6 +2648,7 @@ define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovel %eax, %ecx
@@ -2629,6 +2701,7 @@ define i32 @test_f32_ugt_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2648,6 +2721,7 @@ define i32 @test_f32_ugt_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -2700,6 +2774,7 @@ define i32 @test_f32_uge_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2719,6 +2794,7 @@ define i32 @test_f32_uge_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -2771,6 +2847,7 @@ define i32 @test_f32_ult_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2790,6 +2867,7 @@ define i32 @test_f32_ult_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -2842,6 +2920,7 @@ define i32 @test_f32_ule_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2861,6 +2940,7 @@ define i32 @test_f32_ule_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -2917,6 +2997,7 @@ define i32 @test_f32_une_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -2936,6 +3017,7 @@ define i32 @test_f32_une_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -2989,6 +3071,7 @@ define i32 @test_f32_uno_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3008,6 +3091,7 @@ define i32 @test_f32_uno_s(i32 %a, i32 %b, float %f1, float %f2) #0 {
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovpl %eax, %ecx
@@ -3064,6 +3148,7 @@ define i32 @test_f64_oeq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3083,6 +3168,7 @@ define i32 @test_f64_oeq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -3136,6 +3222,7 @@ define i32 @test_f64_ogt_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3155,6 +3242,7 @@ define i32 @test_f64_ogt_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -3207,6 +3295,7 @@ define i32 @test_f64_oge_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3226,6 +3315,7 @@ define i32 @test_f64_oge_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -3278,6 +3368,7 @@ define i32 @test_f64_olt_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3297,6 +3388,7 @@ define i32 @test_f64_olt_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmoval %eax, %ecx
@@ -3349,6 +3441,7 @@ define i32 @test_f64_ole_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3368,6 +3461,7 @@ define i32 @test_f64_ole_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovael %eax, %ecx
@@ -3420,6 +3514,7 @@ define i32 @test_f64_one_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3439,6 +3534,7 @@ define i32 @test_f64_one_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -3491,6 +3587,7 @@ define i32 @test_f64_ord_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3510,6 +3607,7 @@ define i32 @test_f64_ord_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnpl %eax, %ecx
@@ -3562,6 +3660,7 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3581,6 +3680,7 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovel %eax, %ecx
@@ -3633,6 +3733,7 @@ define i32 @test_f64_ugt_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3652,6 +3753,7 @@ define i32 @test_f64_ugt_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -3704,6 +3806,7 @@ define i32 @test_f64_uge_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3723,6 +3826,7 @@ define i32 @test_f64_uge_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -3775,6 +3879,7 @@ define i32 @test_f64_ult_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3794,6 +3899,7 @@ define i32 @test_f64_ult_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbl %eax, %ecx
@@ -3846,6 +3952,7 @@ define i32 @test_f64_ule_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3865,6 +3972,7 @@ define i32 @test_f64_ule_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovbel %eax, %ecx
@@ -3921,6 +4029,7 @@ define i32 @test_f64_une_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -3940,6 +4049,7 @@ define i32 @test_f64_une_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovnel %eax, %ecx
@@ -3993,6 +4103,7 @@ define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fcompp
+; X87-NEXT: wait
; X87-NEXT: fnstsw %ax
; X87-NEXT: # kill: def $ah killed $ah killed $ax
; X87-NEXT: sahf
@@ -4012,6 +4123,7 @@ define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp)
; X87-CMOV-NEXT: fcompi %st(1), %st
; X87-CMOV-NEXT: fstp %st(0)
+; X87-CMOV-NEXT: wait
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X87-CMOV-NEXT: cmovpl %eax, %ecx
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
index c50d092aba10..c3576addfcdd 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
@@ -59,6 +59,7 @@ define i1 @fptosi_f32toi1(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -105,6 +106,7 @@ define i8 @fptosi_f32toi8(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -151,6 +153,7 @@ define i16 @fptosi_f32toi16(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -193,6 +196,7 @@ define i32 @fptosi_f32toi32(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw (%esp)
; CHECK-NEXT: movzwl (%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -222,6 +226,7 @@ define i64 @fptosi_f32toi64(float %x) #0 {
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -254,6 +259,7 @@ define i64 @fptosi_f32toi64(float %x) #0 {
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: fisttpll (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl (%esp), %eax
; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX-X86-NEXT: movl %ebp, %esp
@@ -276,6 +282,7 @@ define i64 @fptosi_f32toi64(float %x) #0 {
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: flds 8(%ebp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -324,6 +331,7 @@ define i1 @fptoui_f32toi1(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -370,6 +378,7 @@ define i8 @fptoui_f32toi8(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -416,6 +425,7 @@ define i16 @fptoui_f32toi16(float %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw (%esp)
; CHECK-NEXT: movzwl (%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -471,6 +481,7 @@ define i32 @fptoui_f32toi32(float %x) #0 {
; AVX1-X86-NEXT: vmovss %xmm0, (%esp)
; AVX1-X86-NEXT: flds (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
+; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: movl (%esp), %eax
; AVX1-X86-NEXT: movl %ebp, %esp
; AVX1-X86-NEXT: popl %ebp
@@ -503,6 +514,7 @@ define i32 @fptoui_f32toi32(float %x) #0 {
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: flds 8(%ebp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -542,6 +554,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: setbe %al
; SSE-X86-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -596,6 +609,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX1-X86-NEXT: vmovss %xmm0, (%esp)
; AVX1-X86-NEXT: flds (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
+; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: setbe %al
; AVX1-X86-NEXT: movzbl %al, %edx
; AVX1-X86-NEXT: shll $31, %edx
@@ -644,6 +658,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX512-X86-NEXT: vmovss %xmm0, (%esp)
; AVX512-X86-NEXT: flds (%esp)
; AVX512-X86-NEXT: fisttpll (%esp)
+; AVX512-X86-NEXT: wait
; AVX512-X86-NEXT: setbe %dl
; AVX512-X86-NEXT: shll $31, %edx
; AVX512-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -670,6 +685,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; CHECK-NEXT: flds 8(%ebp)
; CHECK-NEXT: flds {{\.LCPI.*}}
; CHECK-NEXT: fcom %st(1)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
@@ -684,6 +700,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fsubrp %st, %st(1)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -734,6 +751,7 @@ define i8 @fptosi_f64toi8(double %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -780,6 +798,7 @@ define i16 @fptosi_f64toi16(double %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -822,6 +841,7 @@ define i32 @fptosi_f64toi32(double %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw (%esp)
; CHECK-NEXT: movzwl (%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -851,6 +871,7 @@ define i64 @fptosi_f64toi64(double %x) #0 {
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -883,6 +904,7 @@ define i64 @fptosi_f64toi64(double %x) #0 {
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: fisttpll (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl (%esp), %eax
; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX-X86-NEXT: movl %ebp, %esp
@@ -905,6 +927,7 @@ define i64 @fptosi_f64toi64(double %x) #0 {
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: fldl 8(%ebp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -953,6 +976,7 @@ define i1 @fptoui_f64toi1(double %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -999,6 +1023,7 @@ define i8 @fptoui_f64toi8(double %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1045,6 +1070,7 @@ define i16 @fptoui_f64toi16(double %x) #0 {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw (%esp)
; CHECK-NEXT: movzwl (%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1100,6 +1126,7 @@ define i32 @fptoui_f64toi32(double %x) #0 {
; AVX1-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX1-X86-NEXT: fldl (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
+; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: movl (%esp), %eax
; AVX1-X86-NEXT: movl %ebp, %esp
; AVX1-X86-NEXT: popl %ebp
@@ -1132,6 +1159,7 @@ define i32 @fptoui_f64toi32(double %x) #0 {
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: fldl 8(%ebp)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1171,6 +1199,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; SSE-X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: setbe %al
; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -1225,6 +1254,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX1-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX1-X86-NEXT: fldl (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
+; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: setbe %al
; AVX1-X86-NEXT: movzbl %al, %edx
; AVX1-X86-NEXT: shll $31, %edx
@@ -1273,6 +1303,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX512-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX512-X86-NEXT: fldl (%esp)
; AVX512-X86-NEXT: fisttpll (%esp)
+; AVX512-X86-NEXT: wait
; AVX512-X86-NEXT: setbe %dl
; AVX512-X86-NEXT: shll $31, %edx
; AVX512-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -1299,6 +1330,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; CHECK-NEXT: fldl 8(%ebp)
; CHECK-NEXT: flds {{\.LCPI.*}}
; CHECK-NEXT: fcom %st(1)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
@@ -1313,6 +1345,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; CHECK-NEXT: .LBB18_2:
; CHECK-NEXT: fstp %st(1)
; CHECK-NEXT: fsubrp %st, %st(1)
+; CHECK-NEXT: wait
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: orl $3072, %ecx # imm = 0xC00
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
index 887185af8121..0a50f3df2ac2 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
@@ -41,6 +41,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -64,6 +65,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -86,6 +88,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
; X87-NEXT: movsbl %al, %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -104,6 +107,7 @@ define float @sitofp_i8tof32(i8 %x) #0 {
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -122,6 +126,7 @@ define float @sitofp_i8tof32(i8 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -139,6 +144,7 @@ define float @sitofp_i8tof32(i8 %x) #0 {
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -157,6 +163,7 @@ define float @sitofp_i16tof32(i16 %x) #0 {
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -175,6 +182,7 @@ define float @sitofp_i16tof32(i16 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -192,6 +200,7 @@ define float @sitofp_i16tof32(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -209,6 +218,7 @@ define float @sitofp_i32tof32(i32 %x) #0 {
; SSE-X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -225,6 +235,7 @@ define float @sitofp_i32tof32(i32 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -241,6 +252,7 @@ define float @sitofp_i32tof32(i32 %x) #0 {
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -258,6 +270,7 @@ define float @sitofp_i64tof32(i64 %x) #0 {
; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fstps (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -274,6 +287,7 @@ define float @sitofp_i64tof32(i64 %x) #0 {
; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-X86-NEXT: fstps (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -286,6 +300,7 @@ define float @sitofp_i64tof32(i64 %x) #0 {
; X87-LABEL: sitofp_i64tof32:
; X87: # %bb.0:
; X87-NEXT: fildll {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
metadata !"round.dynamic",
@@ -304,6 +319,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -324,6 +340,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -343,6 +360,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
; X87-NEXT: movzbl %al, %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -361,6 +379,7 @@ define float @uitofp_i8tof32(i8 %x) #0 {
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -379,6 +398,7 @@ define float @uitofp_i8tof32(i8 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -396,6 +416,7 @@ define float @uitofp_i8tof32(i8 %x) #0 {
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -414,6 +435,7 @@ define float @uitofp_i16tof32(i16 %x) #0 {
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -432,6 +454,7 @@ define float @uitofp_i16tof32(i16 %x) #0 {
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -449,6 +472,7 @@ define float @uitofp_i16tof32(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -471,6 +495,7 @@ define float @uitofp_i32tof32(i32 %x) #0 {
; SSE-X86-NEXT: cvtsd2ss %xmm1, %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
@@ -492,6 +517,7 @@ define float @uitofp_i32tof32(i32 %x) #0 {
; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX1-X86-NEXT: vmovss %xmm0, (%esp)
; AVX1-X86-NEXT: flds (%esp)
+; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: popl %eax
; AVX1-X86-NEXT: .cfi_def_cfa_offset 4
; AVX1-X86-NEXT: retl
@@ -509,6 +535,7 @@ define float @uitofp_i32tof32(i32 %x) #0 {
; AVX512-X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512-X86-NEXT: vmovss %xmm0, (%esp)
; AVX512-X86-NEXT: flds (%esp)
+; AVX512-X86-NEXT: wait
; AVX512-X86-NEXT: popl %eax
; AVX512-X86-NEXT: .cfi_def_cfa_offset 4
; AVX512-X86-NEXT: retl
@@ -531,6 +558,7 @@ define float @uitofp_i32tof32(i32 %x) #0 {
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
+; X87-NEXT: wait
; X87-NEXT: movl %ebp, %esp
; X87-NEXT: popl %ebp
; X87-NEXT: .cfi_def_cfa %esp, 4
@@ -558,9 +586,11 @@ define float @uitofp_i64tof32(i64 %x) #0 {
; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -598,9 +628,11 @@ define float @uitofp_i64tof32(i64 %x) #0 {
; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-X86-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -645,6 +677,7 @@ define float @uitofp_i64tof32(i64 %x) #0 {
; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: movl %ebp, %esp
; X87-NEXT: popl %ebp
; X87-NEXT: .cfi_def_cfa %esp, 4
@@ -669,6 +702,7 @@ define double @sitofp_i8tof64(i8 %x) #0 {
; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -693,6 +727,7 @@ define double @sitofp_i8tof64(i8 %x) #0 {
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -711,6 +746,7 @@ define double @sitofp_i8tof64(i8 %x) #0 {
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -734,6 +770,7 @@ define double @sitofp_i16tof64(i16 %x) #0 {
; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -758,6 +795,7 @@ define double @sitofp_i16tof64(i16 %x) #0 {
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -776,6 +814,7 @@ define double @sitofp_i16tof64(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -798,6 +837,7 @@ define double @sitofp_i32tof64(i32 %x) #0 {
; SSE-X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -820,6 +860,7 @@ define double @sitofp_i32tof64(i32 %x) #0 {
; AVX-X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -837,6 +878,7 @@ define double @sitofp_i32tof64(i32 %x) #0 {
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -859,6 +901,7 @@ define double @sitofp_i64tof64(i64 %x) #0 {
; SSE-X86-NEXT: fildll 8(%ebp)
; SSE-X86-NEXT: fstpl (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -881,6 +924,7 @@ define double @sitofp_i64tof64(i64 %x) #0 {
; AVX-X86-NEXT: fildll 8(%ebp)
; AVX-X86-NEXT: fstpl (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -894,6 +938,7 @@ define double @sitofp_i64tof64(i64 %x) #0 {
; X87-LABEL: sitofp_i64tof64:
; X87: # %bb.0:
; X87-NEXT: fildll {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
metadata !"round.dynamic",
@@ -917,6 +962,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -943,6 +989,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -963,6 +1010,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
; X87-NEXT: movzbl %al, %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -986,6 +1034,7 @@ define double @uitofp_i8tof64(i8 %x) #0 {
; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1010,6 +1059,7 @@ define double @uitofp_i8tof64(i8 %x) #0 {
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1028,6 +1078,7 @@ define double @uitofp_i8tof64(i8 %x) #0 {
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -1051,6 +1102,7 @@ define double @uitofp_i16tof64(i16 %x) #0 {
; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1075,6 +1127,7 @@ define double @uitofp_i16tof64(i16 %x) #0 {
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1093,6 +1146,7 @@ define double @uitofp_i16tof64(i16 %x) #0 {
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
@@ -1118,6 +1172,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; SSE-X86-NEXT: subsd %xmm0, %xmm1
; SSE-X86-NEXT: movsd %xmm1, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1144,6 +1199,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX1-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX1-X86-NEXT: fldl (%esp)
+; AVX1-X86-NEXT: wait
; AVX1-X86-NEXT: movl %ebp, %esp
; AVX1-X86-NEXT: popl %ebp
; AVX1-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1167,6 +1223,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; AVX512-X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0
; AVX512-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX512-X86-NEXT: fldl (%esp)
+; AVX512-X86-NEXT: wait
; AVX512-X86-NEXT: movl %ebp, %esp
; AVX512-X86-NEXT: popl %ebp
; AVX512-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1190,6 +1247,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
+; X87-NEXT: wait
; X87-NEXT: movl %ebp, %esp
; X87-NEXT: popl %ebp
; X87-NEXT: .cfi_def_cfa %esp, 4
@@ -1218,6 +1276,7 @@ define double @uitofp_i64tof64(i64 %x) #0 {
; SSE-X86-NEXT: addpd %xmm0, %xmm1
; SSE-X86-NEXT: movlpd %xmm1, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1249,6 +1308,7 @@ define double @uitofp_i64tof64(i64 %x) #0 {
; AVX-X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX-X86-NEXT: vmovlpd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -1286,6 +1346,7 @@ define double @uitofp_i64tof64(i64 %x) #0 {
; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: movl %ebp, %esp
; X87-NEXT: popl %ebp
; X87-NEXT: .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
index 966be02124d8..26137bd76a9f 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
@@ -26,6 +26,7 @@ define float @fceil32(float %f) #0 {
; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
@@ -43,6 +44,7 @@ define float @fceil32(float %f) #0 {
; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -70,6 +72,7 @@ define double @fceilf64(double %f) #0 {
; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -93,6 +96,7 @@ define double @fceilf64(double %f) #0 {
; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -116,6 +120,7 @@ define float @ffloor32(float %f) #0 {
; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
@@ -133,6 +138,7 @@ define float @ffloor32(float %f) #0 {
; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -160,6 +166,7 @@ define double @ffloorf64(double %f) #0 {
; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -183,6 +190,7 @@ define double @ffloorf64(double %f) #0 {
; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -206,6 +214,7 @@ define float @ftrunc32(float %f) #0 {
; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
@@ -223,6 +232,7 @@ define float @ftrunc32(float %f) #0 {
; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -250,6 +260,7 @@ define double @ftruncf64(double %f) #0 {
; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -273,6 +284,7 @@ define double @ftruncf64(double %f) #0 {
; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -296,6 +308,7 @@ define float @frint32(float %f) #0 {
; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
@@ -313,6 +326,7 @@ define float @frint32(float %f) #0 {
; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -341,6 +355,7 @@ define double @frintf64(double %f) #0 {
; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -364,6 +379,7 @@ define double @frintf64(double %f) #0 {
; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -388,6 +404,7 @@ define float @fnearbyint32(float %f) #0 {
; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0
; SSE41-X86-NEXT: movss %xmm0, (%esp)
; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: popl %eax
; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
; SSE41-X86-NEXT: retl
@@ -405,6 +422,7 @@ define float @fnearbyint32(float %f) #0 {
; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 4
; AVX-X86-NEXT: retl
@@ -433,6 +451,7 @@ define double @fnearbyintf64(double %f) #0 {
; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0
; SSE41-X86-NEXT: movsd %xmm0, (%esp)
; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: wait
; SSE41-X86-NEXT: movl %ebp, %esp
; SSE41-X86-NEXT: popl %ebp
; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
@@ -456,6 +475,7 @@ define double @fnearbyintf64(double %f) #0 {
; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
index c864acda3202..fbcde4aa4cd2 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
@@ -33,6 +33,7 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp {
; SSE-X86-NEXT: addsd 16(%ebp), %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: retl
@@ -52,6 +53,7 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp {
; AVX-X86-NEXT: vaddsd 16(%ebp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: retl
@@ -65,6 +67,7 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: faddl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b,
metadata !"round.dynamic",
@@ -80,6 +83,7 @@ define float @fadd_f32(float %a, float %b) nounwind strictfp {
; SSE-X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: retl
;
@@ -95,6 +99,7 @@ define float @fadd_f32(float %a, float %b) nounwind strictfp {
; AVX-X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: retl
;
@@ -107,6 +112,7 @@ define float @fadd_f32(float %a, float %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fadds {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b,
metadata !"round.dynamic",
@@ -125,6 +131,7 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp {
; SSE-X86-NEXT: subsd 16(%ebp), %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: retl
@@ -144,6 +151,7 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp {
; AVX-X86-NEXT: vsubsd 16(%ebp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: retl
@@ -157,6 +165,7 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fsubl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b,
metadata !"round.dynamic",
@@ -172,6 +181,7 @@ define float @fsub_f32(float %a, float %b) nounwind strictfp {
; SSE-X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: retl
;
@@ -187,6 +197,7 @@ define float @fsub_f32(float %a, float %b) nounwind strictfp {
; AVX-X86-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: retl
;
@@ -199,6 +210,7 @@ define float @fsub_f32(float %a, float %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fsubs {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call float @llvm.experimental.constrained.fsub.f32(float %a, float %b,
metadata !"round.dynamic",
@@ -217,6 +229,7 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp {
; SSE-X86-NEXT: mulsd 16(%ebp), %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: retl
@@ -236,6 +249,7 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp {
; AVX-X86-NEXT: vmulsd 16(%ebp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: retl
@@ -249,6 +263,7 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fmull {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b,
metadata !"round.dynamic",
@@ -264,6 +279,7 @@ define float @fmul_f32(float %a, float %b) nounwind strictfp {
; SSE-X86-NEXT: mulss {{[0-9]+}}(%esp), %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: retl
;
@@ -279,6 +295,7 @@ define float @fmul_f32(float %a, float %b) nounwind strictfp {
; AVX-X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: retl
;
@@ -291,6 +308,7 @@ define float @fmul_f32(float %a, float %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fmuls {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call float @llvm.experimental.constrained.fmul.f32(float %a, float %b,
metadata !"round.dynamic",
@@ -309,6 +327,7 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp {
; SSE-X86-NEXT: divsd 16(%ebp), %xmm0
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: fldl (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: retl
@@ -328,6 +347,7 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp {
; AVX-X86-NEXT: vdivsd 16(%ebp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: retl
@@ -341,6 +361,7 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fdivl {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b,
metadata !"round.dynamic",
@@ -356,6 +377,7 @@ define float @fdiv_f32(float %a, float %b) nounwind strictfp {
; SSE-X86-NEXT: divss {{[0-9]+}}(%esp), %xmm0
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: flds (%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: popl %eax
; SSE-X86-NEXT: retl
;
@@ -371,6 +393,7 @@ define float @fdiv_f32(float %a, float %b) nounwind strictfp {
; AVX-X86-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: retl
;
@@ -383,6 +406,7 @@ define float @fdiv_f32(float %a, float %b) nounwind strictfp {
; X87: # %bb.0:
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fdivs {{[0-9]+}}(%esp)
+; X87-NEXT: wait
; X87-NEXT: retl
%ret = call float @llvm.experimental.constrained.fdiv.f32(float %a, float %b,
metadata !"round.dynamic",
@@ -429,6 +453,7 @@ define void @fpext_f32_to_f64(float* %val, double* %ret) nounwind strictfp {
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: flds (%ecx)
; X87-NEXT: fstpl (%eax)
+; X87-NEXT: wait
; X87-NEXT: retl
%1 = load float, float* %val, align 4
%res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1,
@@ -479,6 +504,7 @@ define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp
; X87-NEXT: fstps (%esp)
; X87-NEXT: flds (%esp)
; X87-NEXT: fstps (%eax)
+; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: retl
%1 = load double, double* %val, align 8
@@ -526,6 +552,7 @@ define void @fsqrt_f64(double* %a) nounwind strictfp {
; X87-NEXT: fldl (%eax)
; X87-NEXT: fsqrt
; X87-NEXT: fstpl (%eax)
+; X87-NEXT: wait
; X87-NEXT: retl
%1 = load double, double* %a, align 8
%res = call double @llvm.experimental.constrained.sqrt.f64(double %1,
@@ -572,6 +599,7 @@ define void @fsqrt_f32(float* %a) nounwind strictfp {
; X87-NEXT: flds (%eax)
; X87-NEXT: fsqrt
; X87-NEXT: fstps (%eax)
+; X87-NEXT: wait
; X87-NEXT: retl
%1 = load float, float* %a, align 4
%res = call float @llvm.experimental.constrained.sqrt.f32(float %1,
@@ -613,6 +641,7 @@ define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
; AVX-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
; AVX-X86-NEXT: vmovsd %xmm1, (%esp)
; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: retl
@@ -631,6 +660,7 @@ define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll fma
; X87-NEXT: addl $24, %esp
; X87-NEXT: retl
@@ -669,6 +699,7 @@ define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
; AVX-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
; AVX-X86-NEXT: vmovss %xmm1, (%esp)
; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: wait
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: retl
;
@@ -686,6 +717,7 @@ define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
+; X87-NEXT: wait
; X87-NEXT: calll fmaf
; X87-NEXT: addl $12, %esp
; X87-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
index 048656f48b62..ce5b09f8d411 100644
--- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -39,6 +39,7 @@ define void @TestFPExtF32_F128() nounwind strictfp {
; X86-NEXT: subl $24, %esp
; X86-NEXT: flds vf32
; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __extendsftf2
@@ -86,6 +87,7 @@ define void @TestFPExtF64_F128() nounwind strictfp {
; X86-NEXT: subl $40, %esp
; X86-NEXT: fldl vf64
; X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __extenddftf2
@@ -114,6 +116,7 @@ define void @TestFPExtF80_F128() nounwind strictfp {
; X64-SSE-NEXT: subq $24, %rsp
; X64-SSE-NEXT: fldt {{.*}}(%rip)
; X64-SSE-NEXT: fstpt (%rsp)
+; X64-SSE-NEXT: wait
; X64-SSE-NEXT: callq __extendxftf2
; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip)
; X64-SSE-NEXT: addq $24, %rsp
@@ -124,6 +127,7 @@ define void @TestFPExtF80_F128() nounwind strictfp {
; X64-AVX-NEXT: subq $24, %rsp
; X64-AVX-NEXT: fldt {{.*}}(%rip)
; X64-AVX-NEXT: fstpt (%rsp)
+; X64-AVX-NEXT: wait
; X64-AVX-NEXT: callq __extendxftf2
; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip)
; X64-AVX-NEXT: addq $24, %rsp
@@ -135,6 +139,7 @@ define void @TestFPExtF80_F128() nounwind strictfp {
; X86-NEXT: subl $40, %esp
; X86-NEXT: fldt vf80
; X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __extendxftf2
@@ -186,6 +191,7 @@ define void @TestFPTruncF128_F32() nounwind strictfp {
; X86-NEXT: calll __trunctfsf2
; X86-NEXT: addl $16, %esp
; X86-NEXT: fstps vf32
+; X86-NEXT: wait
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
entry:
@@ -224,6 +230,7 @@ define void @TestFPTruncF128_F64() nounwind strictfp {
; X86-NEXT: calll __trunctfdf2
; X86-NEXT: addl $16, %esp
; X86-NEXT: fstpl vf64
+; X86-NEXT: wait
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
entry:
@@ -240,6 +247,7 @@ define void @TestFPTruncF128_F80() nounwind strictfp {
; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: callq __trunctfxf2
; X64-SSE-NEXT: fstpt {{.*}}(%rip)
+; X64-SSE-NEXT: wait
; X64-SSE-NEXT: popq %rax
; X64-SSE-NEXT: retq
;
@@ -249,6 +257,7 @@ define void @TestFPTruncF128_F80() nounwind strictfp {
; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
; X64-AVX-NEXT: callq __trunctfxf2
; X64-AVX-NEXT: fstpt {{.*}}(%rip)
+; X64-AVX-NEXT: wait
; X64-AVX-NEXT: popq %rax
; X64-AVX-NEXT: retq
;
@@ -262,6 +271,7 @@ define void @TestFPTruncF128_F80() nounwind strictfp {
; X86-NEXT: calll __trunctfxf2
; X86-NEXT: addl $16, %esp
; X86-NEXT: fstpt vf80
+; X86-NEXT: wait
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll
index 7e5896f04486..1e38b6744f3c 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll
@@ -8,6 +8,7 @@ define i32 @test_oeq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -28,6 +29,7 @@ define i32 @test_oeq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnel %esi, %eax
; X87-64-NEXT: cmovpl %esi, %eax
; X87-64-NEXT: retq
@@ -44,6 +46,7 @@ define i32 @test_ogt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -64,6 +67,7 @@ define i32 @test_ogt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -79,6 +83,7 @@ define i32 @test_oge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -99,6 +104,7 @@ define i32 @test_oge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -114,6 +120,7 @@ define i32 @test_olt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -134,6 +141,7 @@ define i32 @test_olt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -149,6 +157,7 @@ define i32 @test_ole_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -169,6 +178,7 @@ define i32 @test_ole_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -184,6 +194,7 @@ define i32 @test_one_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -204,6 +215,7 @@ define i32 @test_one_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -219,6 +231,7 @@ define i32 @test_ord_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -239,6 +252,7 @@ define i32 @test_ord_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovpl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -254,6 +268,7 @@ define i32 @test_ueq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -274,6 +289,7 @@ define i32 @test_ueq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -289,6 +305,7 @@ define i32 @test_ugt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -309,6 +326,7 @@ define i32 @test_ugt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovael %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -324,6 +342,7 @@ define i32 @test_uge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -344,6 +363,7 @@ define i32 @test_uge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmoval %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -359,6 +379,7 @@ define i32 @test_ult_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -379,6 +400,7 @@ define i32 @test_ult_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovael %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -394,6 +416,7 @@ define i32 @test_ule_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -414,6 +437,7 @@ define i32 @test_ule_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmoval %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -429,6 +453,7 @@ define i32 @test_une_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -449,6 +474,7 @@ define i32 @test_une_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnel %edi, %eax
; X87-64-NEXT: cmovpl %edi, %eax
; X87-64-NEXT: retq
@@ -465,6 +491,7 @@ define i32 @test_uno_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fucompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -485,6 +512,7 @@ define i32 @test_uno_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fucompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnpl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80(
@@ -500,6 +528,7 @@ define i32 @test_oeq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -520,6 +549,7 @@ define i32 @test_oeq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnel %esi, %eax
; X87-64-NEXT: cmovpl %esi, %eax
; X87-64-NEXT: retq
@@ -536,6 +566,7 @@ define i32 @test_ogt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -556,6 +587,7 @@ define i32 @test_ogt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -571,6 +603,7 @@ define i32 @test_oge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -591,6 +624,7 @@ define i32 @test_oge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -606,6 +640,7 @@ define i32 @test_olt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -626,6 +661,7 @@ define i32 @test_olt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -641,6 +677,7 @@ define i32 @test_ole_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -661,6 +698,7 @@ define i32 @test_ole_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovbl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -676,6 +714,7 @@ define i32 @test_one_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -696,6 +735,7 @@ define i32 @test_one_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -711,6 +751,7 @@ define i32 @test_ord_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -731,6 +772,7 @@ define i32 @test_ord_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovpl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -746,6 +788,7 @@ define i32 @test_ueq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -766,6 +809,7 @@ define i32 @test_ueq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnel %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -781,6 +825,7 @@ define i32 @test_ugt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -801,6 +846,7 @@ define i32 @test_ugt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovael %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -816,6 +862,7 @@ define i32 @test_uge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -836,6 +883,7 @@ define i32 @test_uge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmoval %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -851,6 +899,7 @@ define i32 @test_ult_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -871,6 +920,7 @@ define i32 @test_ult_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovael %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -886,6 +936,7 @@ define i32 @test_ule_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -906,6 +957,7 @@ define i32 @test_ule_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmoval %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
@@ -921,6 +973,7 @@ define i32 @test_une_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -941,6 +994,7 @@ define i32 @test_une_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnel %edi, %eax
; X87-64-NEXT: cmovpl %edi, %eax
; X87-64-NEXT: retq
@@ -957,6 +1011,7 @@ define i32 @test_uno_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fldt {{[0-9]+}}(%esp)
; X87-32-NEXT: fcompp
+; X87-32-NEXT: wait
; X87-32-NEXT: fnstsw %ax
; X87-32-NEXT: # kill: def $ah killed $ah killed $ax
; X87-32-NEXT: sahf
@@ -977,6 +1032,7 @@ define i32 @test_uno_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 {
; X87-64-NEXT: fldt {{[0-9]+}}(%rsp)
; X87-64-NEXT: fcompi %st(1), %st
; X87-64-NEXT: fstp %st(0)
+; X87-64-NEXT: wait
; X87-64-NEXT: cmovnpl %esi, %eax
; X87-64-NEXT: retq
%cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80(
diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
index 5f03917f56ae..cf4a51fd6920 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
@@ -38,6 +38,7 @@ define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: faddp %st, %st(1)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fadd_fp80:
@@ -45,6 +46,7 @@ define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: faddp %st, %st(1)
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80 %a, x86_fp80 %b,
metadata !"round.dynamic",
@@ -58,6 +60,7 @@ define x86_fp80 @fsub_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fsubp %st, %st(1)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fsub_fp80:
@@ -65,6 +68,7 @@ define x86_fp80 @fsub_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fsubp %st, %st(1)
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80 %a, x86_fp80 %b,
metadata !"round.dynamic",
@@ -78,6 +82,7 @@ define x86_fp80 @fmul_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fmulp %st, %st(1)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fmul_fp80:
@@ -85,6 +90,7 @@ define x86_fp80 @fmul_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fmulp %st, %st(1)
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80 %a, x86_fp80 %b,
metadata !"round.dynamic",
@@ -98,6 +104,7 @@ define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fdivp %st, %st(1)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fdiv_fp80:
@@ -105,6 +112,7 @@ define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fdivp %st, %st(1)
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80 %a, x86_fp80 %b,
metadata !"round.dynamic",
@@ -116,12 +124,14 @@ define x86_fp80 @fpext_f32_to_fp80(float %a) nounwind strictfp {
; X86-LABEL: fpext_f32_to_fp80:
; X86: # %bb.0:
; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fpext_f32_to_fp80:
; X64: # %bb.0:
; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: flds -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %a,
metadata !"fpexcept.strict") #0
@@ -132,12 +142,14 @@ define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp {
; X86-LABEL: fpext_f64_to_fp80:
; X86: # %bb.0:
; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fpext_f64_to_fp80:
; X64: # %bb.0:
; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldl -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %a,
metadata !"fpexcept.strict") #0
@@ -151,6 +163,7 @@ define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp {
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstps (%esp)
; X86-NEXT: flds (%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
@@ -158,6 +171,7 @@ define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp {
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstps -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: retq
%ret = call float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80 %a,
@@ -176,6 +190,7 @@ define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp {
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: fstpl (%esp)
; X86-NEXT: fldl (%esp)
+; X86-NEXT: wait
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
@@ -184,6 +199,7 @@ define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp {
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: retq
%ret = call double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80 %a,
@@ -197,12 +213,14 @@ define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp {
; X86: # %bb.0:
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fsqrt
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: fsqrt_fp80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fsqrt
+; X64-NEXT: wait
; X64-NEXT: retq
%ret = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %a,
metadata !"round.dynamic",
@@ -216,6 +234,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -231,6 +250,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_sint1:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -251,6 +271,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -266,6 +287,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_sint8:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -286,6 +308,7 @@ define i16 @fp80_to_sint16(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -301,6 +324,7 @@ define i16 @fp80_to_sint16(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_sint16:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -321,6 +345,7 @@ define i32 @fp80_to_sint32(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw (%esp)
; X86-NEXT: movzwl (%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -336,6 +361,7 @@ define i32 @fp80_to_sint32(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_sint32:
; X64: # %bb.0: # %entry
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -362,6 +388,7 @@ define i64 @fp80_to_sint64(x86_fp80 %x) #0 {
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -379,6 +406,7 @@ define i64 @fp80_to_sint64(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_sint64:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -399,6 +427,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -414,6 +443,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_uint1:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -434,6 +464,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -449,6 +480,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_uint8:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -469,6 +501,7 @@ define i16 @fp80_to_uint16(x86_fp80 %x) #0 {
; X86-NEXT: subl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw (%esp)
; X86-NEXT: movzwl (%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -485,6 +518,7 @@ define i16 @fp80_to_uint16(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_uint16:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -511,6 +545,7 @@ define i32 @fp80_to_uint32(x86_fp80 %x) #0 {
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
@@ -527,6 +562,7 @@ define i32 @fp80_to_uint32(x86_fp80 %x) #0 {
; X64-LABEL: fp80_to_uint32:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
@@ -554,6 +590,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: flds {{\.LCPI.*}}
; X86-NEXT: fcom %st(1)
+; X86-NEXT: wait
; X86-NEXT: fnstsw %ax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: # kill: def $ah killed $ah killed $ax
@@ -568,6 +605,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X86-NEXT: .LBB18_2:
; X86-NEXT: fstp %st(1)
; X86-NEXT: fsubrp %st, %st(1)
+; X86-NEXT: wait
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -588,14 +626,17 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
+; X64-NEXT: wait
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fcomi %st(1), %st
+; X64-NEXT: wait
; X64-NEXT: setbe %al
; X64-NEXT: fldz
; X64-NEXT: fxch %st(1)
; X64-NEXT: fcmovnbe %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: fsubrp %st, %st(1)
+; X64-NEXT: wait
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -622,6 +663,7 @@ define x86_fp80 @sint1_to_fp80(i1 %x) #0 {
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -633,6 +675,7 @@ define x86_fp80 @sint1_to_fp80(i1 %x) #0 {
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i1(i1 %x,
metadata !"round.dynamic",
@@ -648,6 +691,7 @@ define x86_fp80 @sint8_to_fp80(i8 %x) #0 {
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -657,6 +701,7 @@ define x86_fp80 @sint8_to_fp80(i8 %x) #0 {
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i8(i8 %x,
metadata !"round.dynamic",
@@ -672,6 +717,7 @@ define x86_fp80 @sint16_to_fp80(i16 %x) #0 {
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -680,6 +726,7 @@ define x86_fp80 @sint16_to_fp80(i16 %x) #0 {
; X64: # %bb.0:
; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i16(i16 %x,
metadata !"round.dynamic",
@@ -695,6 +742,7 @@ define x86_fp80 @sint32_to_fp80(i32 %x) #0 {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: fildl (%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -703,6 +751,7 @@ define x86_fp80 @sint32_to_fp80(i32 %x) #0 {
; X64: # %bb.0:
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildl -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i32(i32 %x,
metadata !"round.dynamic",
@@ -714,12 +763,14 @@ define x86_fp80 @sint64_to_fp80(i64 %x) #0 {
; X86-LABEL: sint64_to_fp80:
; X86: # %bb.0:
; X86-NEXT: fildll {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: retl
;
; X64-LABEL: sint64_to_fp80:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i64(i64 %x,
metadata !"round.dynamic",
@@ -737,6 +788,7 @@ define x86_fp80 @uint1_to_fp80(i1 %x) #0 {
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -746,6 +798,7 @@ define x86_fp80 @uint1_to_fp80(i1 %x) #0 {
; X64-NEXT: andl $1, %edi
; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i1(i1 %x,
metadata !"round.dynamic",
@@ -761,6 +814,7 @@ define x86_fp80 @uint8_to_fp80(i8 %x) #0 {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -770,6 +824,7 @@ define x86_fp80 @uint8_to_fp80(i8 %x) #0 {
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i8(i8 %x,
metadata !"round.dynamic",
@@ -785,6 +840,7 @@ define x86_fp80 @uint16_to_fp80(i16 %x) #0 {
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: fildl (%esp)
+; X86-NEXT: wait
; X86-NEXT: popl %eax
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
@@ -794,6 +850,7 @@ define x86_fp80 @uint16_to_fp80(i16 %x) #0 {
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildl -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i16(i16 %x,
metadata !"round.dynamic",
@@ -815,6 +872,7 @@ define x86_fp80 @uint32_to_fp80(i32 %x) #0 {
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: fildll (%esp)
+; X86-NEXT: wait
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
@@ -825,6 +883,7 @@ define x86_fp80 @uint32_to_fp80(i32 %x) #0 {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i32(i32 %x,
metadata !"round.dynamic",
@@ -849,6 +908,7 @@ define x86_fp80 @uint64_to_fp80(i64 %x) #0 {
; X86-NEXT: shrl $31, %ecx
; X86-NEXT: fildll (%esp)
; X86-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
+; X86-NEXT: wait
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
@@ -862,6 +922,7 @@ define x86_fp80 @uint64_to_fp80(i64 %x) #0 {
; X64-NEXT: sets %al
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: fadds {{\.LCPI.*}}(,%rax,4)
+; X64-NEXT: wait
; X64-NEXT: retq
%result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i64(i64 %x,
metadata !"round.dynamic",
diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll
index 5aa0802adf03..98162a1da9a9 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128.ll
@@ -234,6 +234,7 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
@@ -245,6 +246,7 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
@@ -269,8 +271,10 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
@@ -359,8 +363,10 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhps %xmm0, (%esp)
; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: calll fma
; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: wait
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-X86-NEXT: movl %ebp, %esp
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index fb1ec511fcca..205ea7f66720 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -54,6 +54,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -62,6 +63,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -103,6 +105,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -137,6 +140,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -171,6 +175,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -230,6 +235,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -247,6 +253,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %edx
; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
@@ -329,6 +336,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm3, (%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %al
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
@@ -342,6 +350,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %cl
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
@@ -410,6 +419,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm1, (%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
@@ -422,6 +432,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -466,6 +477,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm1, (%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
@@ -478,6 +490,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -531,6 +544,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -539,6 +553,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -580,6 +595,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -614,6 +630,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512F-32-NEXT: fisttpll (%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -648,6 +665,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512VL-32-NEXT: fisttpll (%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -723,6 +741,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -740,6 +759,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %edx
; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
@@ -822,6 +842,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %al
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
@@ -835,6 +856,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm0, (%esp)
; AVX-32-NEXT: flds (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %cl
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
@@ -903,6 +925,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
@@ -915,6 +938,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -959,6 +983,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
@@ -971,6 +996,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
; AVX512VL-32-NEXT: flds (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -1128,6 +1154,7 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, (%esp), %xmm0, %xmm0
; AVX-32-NEXT: movl %ebp, %esp
@@ -1283,6 +1310,7 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: movl %ebp, %esp
@@ -1764,6 +1792,7 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1772,6 +1801,7 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -1813,6 +1843,7 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -1896,6 +1927,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -1913,6 +1945,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %edx
; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
@@ -1995,6 +2028,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm3, (%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %al
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
@@ -2008,6 +2042,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %cl
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
@@ -2111,6 +2146,7 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -2119,6 +2155,7 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %eax
; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
@@ -2160,6 +2197,7 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -2265,6 +2303,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
@@ -2282,6 +2321,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: fnstcw (%esp)
; SSE-32-NEXT: movzwl (%esp), %edx
; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
@@ -2364,6 +2404,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %al
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
@@ -2377,6 +2418,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm0, (%esp)
; AVX-32-NEXT: flds (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %cl
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
index 6b7f083a2c87..38f19e9ed2d5 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
@@ -57,6 +57,7 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -111,6 +112,7 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -165,6 +167,7 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -236,6 +239,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %al
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
@@ -252,6 +256,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm4, (%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %cl
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
@@ -266,6 +271,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %dl
; AVX-32-NEXT: movzbl %dl, %edx
; AVX-32-NEXT: shll $31, %edx
@@ -279,6 +285,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -385,6 +392,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: movl $0, %eax
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
@@ -402,6 +410,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm4, (%esp)
; AVX512F-32-NEXT: fldl (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -415,6 +424,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %dl
; AVX512F-32-NEXT: shll $31, %edx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -427,6 +437,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -489,6 +500,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -506,6 +518,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -519,6 +532,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -531,6 +545,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -606,6 +621,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -659,6 +675,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -712,6 +729,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -783,6 +801,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %al
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
@@ -798,6 +817,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm3, (%esp)
; AVX-32-NEXT: flds (%esp)
; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %cl
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
@@ -813,6 +833,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: setae %dl
; AVX-32-NEXT: movzbl %dl, %edx
; AVX-32-NEXT: shll $31, %edx
@@ -826,6 +847,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -932,6 +954,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: movl $0, %eax
; AVX512F-32-NEXT: setae %al
; AVX512F-32-NEXT: shll $31, %eax
@@ -948,6 +971,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm2, (%esp)
; AVX512F-32-NEXT: flds (%esp)
; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %cl
; AVX512F-32-NEXT: shll $31, %ecx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -962,6 +986,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: setae %dl
; AVX512F-32-NEXT: shll $31, %edx
; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -974,6 +999,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512F-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -1036,6 +1062,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -1052,6 +1079,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm2, (%esp)
; AVX512VL-32-NEXT: flds (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -1066,6 +1094,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -1078,6 +1107,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
index 3cc9edb83e13..06464ea1cb81 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
@@ -65,6 +65,7 @@ define <8 x i64> @strict_vector_fptosi_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -160,6 +161,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -176,6 +178,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm4, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -190,6 +193,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -206,6 +210,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm4, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -220,6 +225,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -237,6 +243,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp)
; AVX512VL-32-NEXT: fldl (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -250,6 +257,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
@@ -262,6 +270,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
@@ -370,6 +379,7 @@ define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
@@ -465,6 +475,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -480,6 +491,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -495,6 +507,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -511,6 +524,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -525,6 +539,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: movl $0, %eax
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
@@ -541,6 +556,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm4, (%esp)
; AVX512VL-32-NEXT: flds (%esp)
; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %dl
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
@@ -555,6 +571,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
@@ -567,6 +584,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
index f3c79c2cb397..51d9fe2bb864 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
@@ -141,6 +141,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE-32-NEXT: fstps (%esp)
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
@@ -177,6 +178,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE41-32-NEXT: fstps (%esp)
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE41-32-NEXT: wait
; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
@@ -213,6 +215,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-32-NEXT: fstps (%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-32-NEXT: movl %ebp, %esp
@@ -277,12 +280,14 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; SSE-32-NEXT: fstps (%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: movd %xmm0, %eax
; SSE-32-NEXT: shrl $31, %eax
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; SSE-32-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
@@ -344,12 +349,14 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; SSE41-32-NEXT: fstps (%esp)
+; SSE41-32-NEXT: wait
; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE41-32-NEXT: movd %xmm0, %eax
; SSE41-32-NEXT: shrl $31, %eax
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp)
+; SSE41-32-NEXT: wait
; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
@@ -410,11 +417,13 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vextractps $3, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-32-NEXT: fstps (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-32-NEXT: movl %ebp, %esp
@@ -1143,6 +1152,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp)
; SSE-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fstpl (%esp)
+; SSE-32-NEXT: wait
; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-32-NEXT: movl %ebp, %esp
@@ -1178,6 +1188,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp)
; SSE41-32-NEXT: fstpl (%esp)
+; SSE41-32-NEXT: wait
; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE41-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE41-32-NEXT: movl %ebp, %esp
@@ -1213,6 +1224,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-32-NEXT: movl %ebp, %esp
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index 60f0c3430125..5fbb8aa864c1 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -651,12 +651,14 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstpl (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -889,6 +891,7 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fstps (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
@@ -1008,21 +1011,25 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-32-NEXT: fstps (%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vextractps $3, %xmm0, %eax
; AVX-32-NEXT: shrl $31, %eax
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vextractps $1, %xmm1, %eax
; AVX-32-NEXT: shrl $31, %eax
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vextractps $3, %xmm1, %eax
; AVX-32-NEXT: shrl $31, %eax
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
index b61c401a6e46..55134b4e0d6d 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
@@ -290,12 +290,14 @@ define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 {
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl (%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -303,12 +305,14 @@ define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 {
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1]
; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -422,6 +426,7 @@ define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
@@ -434,6 +439,7 @@ define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fstps (%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
@@ -514,41 +520,49 @@ define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps (%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $3, %xmm0, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $1, %xmm3, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $3, %xmm3, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $1, %xmm2, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $3, %xmm2, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $1, %xmm1, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vextractps $3, %xmm1, %eax
; NODQ-32-NEXT: shrl $31, %eax
; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp)
; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT: wait
; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index cd8a06f71d02..7c35ccebefb7 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -92,6 +92,7 @@ define <3 x double> @constrained_vector_fdiv_v3f64() #0 {
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fdiv_v3f64:
@@ -292,6 +293,7 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
; CHECK-NEXT: callq fmod
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -484,6 +486,7 @@ define <3 x double> @constrained_vector_fmul_v3f64() #0 {
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fmul_v3f64:
@@ -621,6 +624,7 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 {
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fadd_v3f64:
@@ -760,6 +764,7 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 {
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fsub_v3f64:
@@ -892,6 +897,7 @@ define <3 x double> @constrained_vector_sqrt_v3f64() #0 {
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_sqrt_v3f64:
@@ -1077,6 +1083,7 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
; CHECK-NEXT: callq pow
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -1333,6 +1340,7 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
; CHECK-NEXT: callq __powidf2
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -1570,6 +1578,7 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
; CHECK-NEXT: callq sin
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -1794,6 +1803,7 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
; CHECK-NEXT: callq cos
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -2018,6 +2028,7 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
; CHECK-NEXT: callq exp
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -2242,6 +2253,7 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
; CHECK-NEXT: callq exp2
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -2466,6 +2478,7 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
; CHECK-NEXT: callq log
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -2690,6 +2703,7 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
; CHECK-NEXT: callq log10
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -2914,6 +2928,7 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
; CHECK-NEXT: callq log2
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -3116,6 +3131,7 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 {
; CHECK-NEXT: callq rint
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -3286,6 +3302,7 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; CHECK-NEXT: callq nearbyint
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -3492,6 +3509,7 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
; CHECK-NEXT: callq fmax
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -3742,6 +3760,7 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
; CHECK-NEXT: callq fmin
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -5549,6 +5568,7 @@ define <3 x double> @constrained_vector_fpext_v3f32() #0 {
; CHECK-NEXT: cvtss2sd %xmm2, %xmm2
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v3f32:
@@ -5694,6 +5714,7 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 {
; CHECK-NEXT: callq ceil
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -5822,6 +5843,7 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 {
; CHECK-NEXT: callq floor
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -5972,6 +5994,7 @@ define <3 x double> @constrained_vector_round_v3f64() #0 {
; CHECK-NEXT: callq round
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -6112,6 +6135,7 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 {
; CHECK-NEXT: callq trunc
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
@@ -6334,6 +6358,7 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
; CHECK-NEXT: cvtsi2sd %eax, %xmm0
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movapd %xmm2, %xmm0
; CHECK-NEXT: retq
;
@@ -6401,6 +6426,7 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: cvtsi2sd %rdx, %xmm2
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
@@ -6988,6 +7014,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
; CHECK-NEXT: cvtsi2sd %rax, %xmm0
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: movapd %xmm2, %xmm0
; CHECK-NEXT: retq
;
@@ -7096,6 +7123,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; CHECK-NEXT: addpd %xmm4, %xmm2
; CHECK-NEXT: movlpd %xmm2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64:
More information about the llvm-commits
mailing list