[llvm] [X86] Resolve FIXME: Add FPCW as a rounding control register (PR #82452)

Wed Feb 21 12:33:57 PST 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/82452

>From efefd6f62d48c0f91e35c056e324ad87b3d9287a Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Wed, 21 Feb 2024 14:53:23 -0500
Subject: [PATCH 1/3] Insert wait if instruction right before call is a waiting
 one

The reason adding fpcr broke tests is because that caused LLVM to no longer kill the instruction before a call, which prevented LLVM from treating x87 as an operand, which meant the call was not eligible for a wait before it as a result.

This patch now has LLVM add wait to the end of a x87 instruction is a call is immediately after.
---
 llvm/lib/Target/X86/X86InsertWait.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp
index 69a3d32a931498..10f141abe9c786 100644
--- a/llvm/lib/Target/X86/X86InsertWait.cpp
+++ b/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -115,7 +115,8 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
       // If the following instruction is an X87 instruction and isn't an X87
       // non-waiting control instruction, we can omit insert wait instruction.
       MachineBasicBlock::iterator AfterMI = std::next(MI);
-      if (AfterMI != MBB.end() && X86::isX87Instruction(*AfterMI) &&
+      if (AfterMI != MBB.end() && !AfterMI->isCall() &&
+          X86::isX87Instruction(*AfterMI) &&
           !isX87NonWaitingControlInstruction(*AfterMI))
         continue;
 

>From cb1f41f81be005d60497658d8558c3127fec6053 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Tue, 20 Feb 2024 20:41:40 -0500
Subject: [PATCH 2/3] Update X86InsertWait.cpp

[X86] Resolve FIXME: Add FPCW as a rounding control register

To prevent tests from breaking, another fix had to be made: Now, we check if the instruction after a waiting instruction is a call, and if so, we insert the wait.
---
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp   |  4 +-
 llvm/lib/Target/X86/X86RegisterInfo.td        |  6 +--
 llvm/test/CodeGen/X86/fp-intrinsics.ll        | 19 --------
 .../CodeGen/X86/fp-strict-libcalls-msvc32.ll  |  9 ----
 llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll |  1 -
 llvm/test/CodeGen/X86/fp-strict-scalar.ll     |  2 -
 llvm/test/CodeGen/X86/fp128-cast-strict.ll    |  4 +-
 llvm/test/CodeGen/X86/fp80-strict-libcalls.ll | 48 -------------------
 llvm/test/CodeGen/X86/half-constrained.ll     |  7 ---
 llvm/test/CodeGen/X86/ldexp-f80.ll            |  1 -
 llvm/test/CodeGen/X86/vec-strict-128.ll       |  2 -
 11 files changed, 5 insertions(+), 98 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index be8275c92e11ae..c7ef11aede886a 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -670,9 +670,7 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
 }
 
 ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
-  // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
-  // tests at the moment, which is not what we expected.
-  static const MCPhysReg RCRegs[] = {X86::MXCSR};
+  static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
   return RCRegs;
 }
 
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 166024bf3b53fe..e9b86361bbd8ec 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -445,12 +445,12 @@ def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
 def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
 def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
 
-// Floating-point status word
-def FPSW : X86Reg<"fpsr", 0>;
-
 // Floating-point control word
 def FPCW : X86Reg<"fpcr", 0>;
 
+// Floating-point status word
+def FPSW : X86Reg<"fpsr", 0>;
+
 // SIMD Floating-point control register.
 // Note: We only model the "Uses" of the control bits: current rounding modes,
 // DAZ, FTZ and exception masks. We don't model the "Defs" of flag bits.
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index d2b45ee1e03e63..1387cac7fe1a70 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -301,7 +301,6 @@ define double @f6() #0 {
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll pow
 ; X87-NEXT:    addl $28, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -413,7 +412,6 @@ define double @f8() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll sin
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -464,7 +462,6 @@ define double @f9() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll cos
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -515,7 +512,6 @@ define double @f10() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll exp
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -566,7 +562,6 @@ define double @f11() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll exp2
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -617,7 +612,6 @@ define double @f12() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll log
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -668,7 +662,6 @@ define double @f13() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll log10
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -719,7 +712,6 @@ define double @f14() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll log2
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -770,7 +762,6 @@ define double @f15() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll rint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -818,7 +809,6 @@ define double @f16() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll nearbyint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -867,7 +857,6 @@ define double @f19() #0 {
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fld1
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll fmod
 ; X87-NEXT:    addl $28, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1607,7 +1596,6 @@ define i32 @f23(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll lrint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1655,7 +1643,6 @@ define i32 @f24(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll lrintf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1703,7 +1690,6 @@ define i64 @f25(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll llrint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1751,7 +1737,6 @@ define i64 @f26(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll llrintf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1799,7 +1784,6 @@ define i32 @f27(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll lround
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1846,7 +1830,6 @@ define i32 @f28(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll lroundf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1893,7 +1876,6 @@ define i64 @f29(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll llround
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1940,7 +1922,6 @@ define i64 @f30(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll llroundf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
index 1bc308bef8cccf..c9dd8a7374569f 100644
--- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
@@ -7,7 +7,6 @@ define float @ceil(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _ceil
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -24,7 +23,6 @@ define float @cos(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _cos
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -41,7 +39,6 @@ define float @exp(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _exp
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -58,7 +55,6 @@ define float @floor(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _floor
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -78,7 +74,6 @@ define float @frem(float %x, float %y) #0 {
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _fmod
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -95,7 +90,6 @@ define float @log(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _log
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -112,7 +106,6 @@ define float @log10(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _log10
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -132,7 +125,6 @@ define float @pow(float %x, float %y) #0 {
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _pow
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -149,7 +141,6 @@ define float @sin(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
-; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _sin
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
index cb1876fee05aea..47a83e9ae581a2 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
@@ -4186,7 +4186,6 @@ define void @foo(float %0, float %1) #0 {
 ; X87-CMOV-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-CMOV-NEXT:    fucompi %st(1), %st
 ; X87-CMOV-NEXT:    fstp %st(0)
-; X87-CMOV-NEXT:    wait
 ; X87-CMOV-NEXT:    ja bar # TAILCALL
 ; X87-CMOV-NEXT:  # %bb.1:
 ; X87-CMOV-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
index f1be74f5c3ac4c..5939bb4189b7ab 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
@@ -660,7 +660,6 @@ define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll fma
 ; X87-NEXT:    addl $24, %esp
 ; X87-NEXT:    retl
@@ -717,7 +716,6 @@ define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
 ; X87-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
-; X87-NEXT:    wait
 ; X87-NEXT:    calll fmaf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
index f141153d059acb..a7cc2ae49e005f 100644
--- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -28,7 +28,7 @@ define dso_local void @TestFPExtF16_F128() nounwind strictfp {
 ; X64-AVX512-LABEL: TestFPExtF16_F128:
 ; X64-AVX512:       # %bb.0: # %entry
 ; X64-AVX512-NEXT:    pushq %rax
-; X64-AVX512-NEXT:    vmovsh vf16(%rip), %xmm0
+; X64-AVX512-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
 ; X64-AVX512-NEXT:    callq __extendhftf2 at PLT
 ; X64-AVX512-NEXT:    vmovaps %xmm0, vf128(%rip)
 ; X64-AVX512-NEXT:    popq %rax
@@ -167,7 +167,6 @@ define dso_local void @TestFPExtF80_F128() nounwind strictfp {
 ; X64-SSE-NEXT:    subq $24, %rsp
 ; X64-SSE-NEXT:    fldt vf80(%rip)
 ; X64-SSE-NEXT:    fstpt (%rsp)
-; X64-SSE-NEXT:    wait
 ; X64-SSE-NEXT:    callq __extendxftf2 at PLT
 ; X64-SSE-NEXT:    movaps %xmm0, vf128(%rip)
 ; X64-SSE-NEXT:    addq $24, %rsp
@@ -178,7 +177,6 @@ define dso_local void @TestFPExtF80_F128() nounwind strictfp {
 ; X64-AVX-NEXT:    subq $24, %rsp
 ; X64-AVX-NEXT:    fldt vf80(%rip)
 ; X64-AVX-NEXT:    fstpt (%rsp)
-; X64-AVX-NEXT:    wait
 ; X64-AVX-NEXT:    callq __extendxftf2 at PLT
 ; X64-AVX-NEXT:    vmovaps %xmm0, vf128(%rip)
 ; X64-AVX-NEXT:    addq $24, %rsp
diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
index 4d50b15e5c185b..9462acd53c4392 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
@@ -12,7 +12,6 @@ define x86_fp80 @fma(x86_fp80 %x, x86_fp80 %y, x86_fp80 %z) nounwind strictfp {
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll fmal
 ; X86-NEXT:    addl $36, %esp
 ; X86-NEXT:    retl
@@ -26,7 +25,6 @@ define x86_fp80 @fma(x86_fp80 %x, x86_fp80 %y, x86_fp80 %z) nounwind strictfp {
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq fmal at PLT
 ; X64-NEXT:    addq $56, %rsp
 ; X64-NEXT:    retq
@@ -43,7 +41,6 @@ define x86_fp80 @frem(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll fmodl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -55,7 +52,6 @@ define x86_fp80 @frem(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq fmodl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -70,7 +66,6 @@ define x86_fp80 @ceil(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll ceill
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -80,7 +75,6 @@ define x86_fp80 @ceil(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq ceill at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -95,7 +89,6 @@ define x86_fp80 @cos(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll cosl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -105,7 +98,6 @@ define x86_fp80 @cos(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq cosl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -120,7 +112,6 @@ define x86_fp80 @exp(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll expl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -130,7 +121,6 @@ define x86_fp80 @exp(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq expl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -145,7 +135,6 @@ define x86_fp80 @exp2(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll exp2l
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -155,7 +144,6 @@ define x86_fp80 @exp2(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq exp2l at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -170,7 +158,6 @@ define x86_fp80 @floor(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll floorl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -180,7 +167,6 @@ define x86_fp80 @floor(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq floorl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -195,7 +181,6 @@ define x86_fp80 @log(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll logl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -205,7 +190,6 @@ define x86_fp80 @log(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq logl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -220,7 +204,6 @@ define x86_fp80 @log10(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll log10l
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -230,7 +213,6 @@ define x86_fp80 @log10(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq log10l at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -245,7 +227,6 @@ define x86_fp80 @log2(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll log2l
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -255,7 +236,6 @@ define x86_fp80 @log2(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq log2l at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -272,7 +252,6 @@ define x86_fp80 @maxnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll fmaxl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -284,7 +263,6 @@ define x86_fp80 @maxnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq fmaxl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -301,7 +279,6 @@ define x86_fp80 @minnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll fminl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -313,7 +290,6 @@ define x86_fp80 @minnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq fminl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -328,7 +304,6 @@ define x86_fp80 @nearbyint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll nearbyintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -338,7 +313,6 @@ define x86_fp80 @nearbyint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq nearbyintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -355,7 +329,6 @@ define x86_fp80 @pow(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll powl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -367,7 +340,6 @@ define x86_fp80 @pow(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq powl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -385,7 +357,6 @@ define x86_fp80 @powi(x86_fp80 %x, i32 %y) nounwind strictfp {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll __powixf2
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    retl
@@ -395,7 +366,6 @@ define x86_fp80 @powi(x86_fp80 %x, i32 %y) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq __powixf2 at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -410,7 +380,6 @@ define x86_fp80 @rint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll rintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -420,7 +389,6 @@ define x86_fp80 @rint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq rintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -435,7 +403,6 @@ define x86_fp80 @round(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll roundl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -445,7 +412,6 @@ define x86_fp80 @round(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq roundl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -460,7 +426,6 @@ define x86_fp80 @roundeven(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll roundevenl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -470,7 +435,6 @@ define x86_fp80 @roundeven(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq roundevenl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -485,7 +449,6 @@ define x86_fp80 @sin(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll sinl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -495,7 +458,6 @@ define x86_fp80 @sin(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq sinl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -510,7 +472,6 @@ define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll truncl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -520,7 +481,6 @@ define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq truncl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -535,7 +495,6 @@ define i32 @lrint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll lrintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -545,7 +504,6 @@ define i32 @lrint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq lrintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -560,7 +518,6 @@ define i64 @llrint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll llrintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -570,7 +527,6 @@ define i64 @llrint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq llrintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -585,7 +541,6 @@ define i32 @lround(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll lroundl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -595,7 +550,6 @@ define i32 @lround(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq lroundl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -610,7 +564,6 @@ define i64 @llround(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
-; X86-NEXT:    wait
 ; X86-NEXT:    calll llroundl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -620,7 +573,6 @@ define i64 @llround(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq llroundl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/half-constrained.ll b/llvm/test/CodeGen/X86/half-constrained.ll
index b4db61a4a4a342..bf80fc496d594e 100644
--- a/llvm/test/CodeGen/X86/half-constrained.ll
+++ b/llvm/test/CodeGen/X86/half-constrained.ll
@@ -165,7 +165,6 @@ define void @float_to_half(float %0) strictfp {
 ; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NOF16C-NEXT:    flds {{[0-9]+}}(%esp)
 ; X32-NOF16C-NEXT:    fstps (%esp)
-; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __gnu_f2h_ieee
 ; X32-NOF16C-NEXT:    movw %ax, a
 ; X32-NOF16C-NEXT:    addl $12, %esp
@@ -213,7 +212,6 @@ define void @double_to_half(double %0) strictfp {
 ; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NOF16C-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X32-NOF16C-NEXT:    fstpl (%esp)
-; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __truncdfhf2
 ; X32-NOF16C-NEXT:    movw %ax, a
 ; X32-NOF16C-NEXT:    addl $12, %esp
@@ -266,7 +264,6 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NOF16C-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X32-NOF16C-NEXT:    fstpt (%esp)
-; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __truncxfhf2
 ; X32-NOF16C-NEXT:    movw %ax, a
 ; X32-NOF16C-NEXT:    addl $12, %esp
@@ -279,7 +276,6 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-F16C-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X32-F16C-NEXT:    fstpt (%esp)
-; X32-F16C-NEXT:    wait
 ; X32-F16C-NEXT:    calll __truncxfhf2
 ; X32-F16C-NEXT:    vpextrw $0, %xmm0, a
 ; X32-F16C-NEXT:    addl $12, %esp
@@ -292,7 +288,6 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 32
 ; X64-NOF16C-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NOF16C-NEXT:    fstpt (%rsp)
-; X64-NOF16C-NEXT:    wait
 ; X64-NOF16C-NEXT:    callq __truncxfhf2 at PLT
 ; X64-NOF16C-NEXT:    pextrw $0, %xmm0, %eax
 ; X64-NOF16C-NEXT:    movq a at GOTPCREL(%rip), %rcx
@@ -307,7 +302,6 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X64-F16C-NEXT:    .cfi_def_cfa_offset 32
 ; X64-F16C-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-F16C-NEXT:    fstpt (%rsp)
-; X64-F16C-NEXT:    wait
 ; X64-F16C-NEXT:    callq __truncxfhf2 at PLT
 ; X64-F16C-NEXT:    movq a at GOTPCREL(%rip), %rax
 ; X64-F16C-NEXT:    vpextrw $0, %xmm0, (%rax)
@@ -335,7 +329,6 @@ define void @add() strictfp {
 ; X32-NOF16C-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X32-NOF16C-NEXT:    faddp %st, %st(1)
 ; X32-NOF16C-NEXT:    fstps (%esp)
-; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __gnu_f2h_ieee
 ; X32-NOF16C-NEXT:    movw %ax, c
 ; X32-NOF16C-NEXT:    addl $12, %esp
diff --git a/llvm/test/CodeGen/X86/ldexp-f80.ll b/llvm/test/CodeGen/X86/ldexp-f80.ll
index 3a10eab2f47cbb..6d733f602fc55c 100644
--- a/llvm/test/CodeGen/X86/ldexp-f80.ll
+++ b/llvm/test/CodeGen/X86/ldexp-f80.ll
@@ -26,7 +26,6 @@ define x86_fp80 @test_strict_ldexp_f80_i32(ptr addrspace(1) %out, x86_fp80 %a, i
 ; X64-NEXT:    movl %esi, %edi
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
-; X64-NEXT:    wait
 ; X64-NEXT:    callq ldexpl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll
index 84c0ff61cf9b7d..ee00f01af1a3d4 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128.ll
@@ -271,7 +271,6 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
 ; SSE-X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; SSE-X86-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
 ; SSE-X86-NEXT:    fstps {{[0-9]+}}(%esp)
-; SSE-X86-NEXT:    wait
 ; SSE-X86-NEXT:    calll fmaf
 ; SSE-X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; SSE-X86-NEXT:    wait
@@ -363,7 +362,6 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
 ; SSE-X86-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
 ; SSE-X86-NEXT:    movhps %xmm0, (%esp)
 ; SSE-X86-NEXT:    fstpl {{[0-9]+}}(%esp)
-; SSE-X86-NEXT:    wait
 ; SSE-X86-NEXT:    calll fma
 ; SSE-X86-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; SSE-X86-NEXT:    wait

>From 9ae8e93467175e6c23936e3d84e643072127f1d7 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Wed, 21 Feb 2024 15:33:24 -0500
Subject: [PATCH 3/3] Fix

---
 llvm/test/CodeGen/X86/fp-intrinsics.ll        | 19 ++++++++
 .../CodeGen/X86/fp-strict-libcalls-msvc32.ll  |  9 ++++
 llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll |  1 +
 llvm/test/CodeGen/X86/fp-strict-scalar.ll     |  2 +
 llvm/test/CodeGen/X86/fp128-cast-strict.ll    |  2 +
 llvm/test/CodeGen/X86/fp80-strict-libcalls.ll | 48 +++++++++++++++++++
 llvm/test/CodeGen/X86/half-constrained.ll     |  7 +++
 llvm/test/CodeGen/X86/ldexp-f80.ll            |  1 +
 llvm/test/CodeGen/X86/vec-strict-128.ll       |  2 +
 9 files changed, 91 insertions(+)

diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 1387cac7fe1a70..d2b45ee1e03e63 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -301,6 +301,7 @@ define double @f6() #0 {
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll pow
 ; X87-NEXT:    addl $28, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -412,6 +413,7 @@ define double @f8() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll sin
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -462,6 +464,7 @@ define double @f9() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll cos
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -512,6 +515,7 @@ define double @f10() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll exp
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -562,6 +566,7 @@ define double @f11() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll exp2
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -612,6 +617,7 @@ define double @f12() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll log
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -662,6 +668,7 @@ define double @f13() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll log10
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -712,6 +719,7 @@ define double @f14() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll log2
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -762,6 +770,7 @@ define double @f15() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll rint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -809,6 +818,7 @@ define double @f16() #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll nearbyint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -857,6 +867,7 @@ define double @f19() #0 {
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fld1
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll fmod
 ; X87-NEXT:    addl $28, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1596,6 +1607,7 @@ define i32 @f23(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll lrint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1643,6 +1655,7 @@ define i32 @f24(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll lrintf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1690,6 +1703,7 @@ define i64 @f25(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll llrint
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1737,6 +1751,7 @@ define i64 @f26(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll llrintf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1784,6 +1799,7 @@ define i32 @f27(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll lround
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1830,6 +1846,7 @@ define i32 @f28(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll lroundf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1876,6 +1893,7 @@ define i64 @f29(double %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll llround
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
@@ -1922,6 +1940,7 @@ define i64 @f30(float %x) #0 {
 ; X87-NEXT:    .cfi_def_cfa_offset 16
 ; X87-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll llroundf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
index c9dd8a7374569f..1bc308bef8cccf 100644
--- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
@@ -7,6 +7,7 @@ define float @ceil(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _ceil
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -23,6 +24,7 @@ define float @cos(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _cos
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -39,6 +41,7 @@ define float @exp(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _exp
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -55,6 +58,7 @@ define float @floor(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _floor
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -74,6 +78,7 @@ define float @frem(float %x, float %y) #0 {
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _fmod
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -90,6 +95,7 @@ define float @log(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _log
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -106,6 +112,7 @@ define float @log10(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _log10
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -125,6 +132,7 @@ define float @pow(float %x, float %y) #0 {
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _pow
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -141,6 +149,7 @@ define float @sin(float %x) #0 {
 ; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    calll _sin
 ; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
index 47a83e9ae581a2..cb1876fee05aea 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll
@@ -4186,6 +4186,7 @@ define void @foo(float %0, float %1) #0 {
 ; X87-CMOV-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87-CMOV-NEXT:    fucompi %st(1), %st
 ; X87-CMOV-NEXT:    fstp %st(0)
+; X87-CMOV-NEXT:    wait
 ; X87-CMOV-NEXT:    ja bar # TAILCALL
 ; X87-CMOV-NEXT:  # %bb.1:
 ; X87-CMOV-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
index 5939bb4189b7ab..f1be74f5c3ac4c 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
@@ -660,6 +660,7 @@ define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll fma
 ; X87-NEXT:    addl $24, %esp
 ; X87-NEXT:    retl
@@ -716,6 +717,7 @@ define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
 ; X87-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fstps (%esp)
+; X87-NEXT:    wait
 ; X87-NEXT:    calll fmaf
 ; X87-NEXT:    addl $12, %esp
 ; X87-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
index a7cc2ae49e005f..33d18848acae0d 100644
--- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -167,6 +167,7 @@ define dso_local void @TestFPExtF80_F128() nounwind strictfp {
 ; X64-SSE-NEXT:    subq $24, %rsp
 ; X64-SSE-NEXT:    fldt vf80(%rip)
 ; X64-SSE-NEXT:    fstpt (%rsp)
+; X64-SSE-NEXT:    wait
 ; X64-SSE-NEXT:    callq __extendxftf2 at PLT
 ; X64-SSE-NEXT:    movaps %xmm0, vf128(%rip)
 ; X64-SSE-NEXT:    addq $24, %rsp
@@ -177,6 +178,7 @@ define dso_local void @TestFPExtF80_F128() nounwind strictfp {
 ; X64-AVX-NEXT:    subq $24, %rsp
 ; X64-AVX-NEXT:    fldt vf80(%rip)
 ; X64-AVX-NEXT:    fstpt (%rsp)
+; X64-AVX-NEXT:    wait
 ; X64-AVX-NEXT:    callq __extendxftf2 at PLT
 ; X64-AVX-NEXT:    vmovaps %xmm0, vf128(%rip)
 ; X64-AVX-NEXT:    addq $24, %rsp
diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
index 9462acd53c4392..4d50b15e5c185b 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
@@ -12,6 +12,7 @@ define x86_fp80 @fma(x86_fp80 %x, x86_fp80 %y, x86_fp80 %z) nounwind strictfp {
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll fmal
 ; X86-NEXT:    addl $36, %esp
 ; X86-NEXT:    retl
@@ -25,6 +26,7 @@ define x86_fp80 @fma(x86_fp80 %x, x86_fp80 %y, x86_fp80 %z) nounwind strictfp {
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq fmal at PLT
 ; X64-NEXT:    addq $56, %rsp
 ; X64-NEXT:    retq
@@ -41,6 +43,7 @@ define x86_fp80 @frem(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll fmodl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -52,6 +55,7 @@ define x86_fp80 @frem(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq fmodl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -66,6 +70,7 @@ define x86_fp80 @ceil(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll ceill
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -75,6 +80,7 @@ define x86_fp80 @ceil(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq ceill at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -89,6 +95,7 @@ define x86_fp80 @cos(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll cosl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -98,6 +105,7 @@ define x86_fp80 @cos(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq cosl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -112,6 +120,7 @@ define x86_fp80 @exp(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll expl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -121,6 +130,7 @@ define x86_fp80 @exp(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq expl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -135,6 +145,7 @@ define x86_fp80 @exp2(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll exp2l
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -144,6 +155,7 @@ define x86_fp80 @exp2(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq exp2l at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -158,6 +170,7 @@ define x86_fp80 @floor(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll floorl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -167,6 +180,7 @@ define x86_fp80 @floor(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq floorl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -181,6 +195,7 @@ define x86_fp80 @log(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll logl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -190,6 +205,7 @@ define x86_fp80 @log(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq logl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -204,6 +220,7 @@ define x86_fp80 @log10(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll log10l
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -213,6 +230,7 @@ define x86_fp80 @log10(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq log10l at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -227,6 +245,7 @@ define x86_fp80 @log2(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll log2l
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -236,6 +255,7 @@ define x86_fp80 @log2(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq log2l at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -252,6 +272,7 @@ define x86_fp80 @maxnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll fmaxl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -263,6 +284,7 @@ define x86_fp80 @maxnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq fmaxl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -279,6 +301,7 @@ define x86_fp80 @minnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll fminl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -290,6 +313,7 @@ define x86_fp80 @minnum(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq fminl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -304,6 +328,7 @@ define x86_fp80 @nearbyint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll nearbyintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -313,6 +338,7 @@ define x86_fp80 @nearbyint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq nearbyintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -329,6 +355,7 @@ define x86_fp80 @pow(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll powl
 ; X86-NEXT:    addl $24, %esp
 ; X86-NEXT:    retl
@@ -340,6 +367,7 @@ define x86_fp80 @pow(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq powl at PLT
 ; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
@@ -357,6 +385,7 @@ define x86_fp80 @powi(x86_fp80 %x, i32 %y) nounwind strictfp {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll __powixf2
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    retl
@@ -366,6 +395,7 @@ define x86_fp80 @powi(x86_fp80 %x, i32 %y) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq __powixf2 at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -380,6 +410,7 @@ define x86_fp80 @rint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll rintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -389,6 +420,7 @@ define x86_fp80 @rint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq rintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -403,6 +435,7 @@ define x86_fp80 @round(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll roundl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -412,6 +445,7 @@ define x86_fp80 @round(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq roundl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -426,6 +460,7 @@ define x86_fp80 @roundeven(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll roundevenl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -435,6 +470,7 @@ define x86_fp80 @roundeven(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq roundevenl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -449,6 +485,7 @@ define x86_fp80 @sin(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll sinl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -458,6 +495,7 @@ define x86_fp80 @sin(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq sinl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -472,6 +510,7 @@ define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll truncl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -481,6 +520,7 @@ define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq truncl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -495,6 +535,7 @@ define i32 @lrint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll lrintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -504,6 +545,7 @@ define i32 @lrint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq lrintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -518,6 +560,7 @@ define i64 @llrint(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll llrintl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -527,6 +570,7 @@ define i64 @llrint(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq llrintl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -541,6 +585,7 @@ define i32 @lround(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll lroundl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -550,6 +595,7 @@ define i32 @lround(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq lroundl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
@@ -564,6 +610,7 @@ define i64 @llround(x86_fp80 %x) nounwind strictfp {
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
 ; X86-NEXT:    calll llroundl
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -573,6 +620,7 @@ define i64 @llround(x86_fp80 %x) nounwind strictfp {
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq llroundl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/half-constrained.ll b/llvm/test/CodeGen/X86/half-constrained.ll
index bf80fc496d594e..b4db61a4a4a342 100644
--- a/llvm/test/CodeGen/X86/half-constrained.ll
+++ b/llvm/test/CodeGen/X86/half-constrained.ll
@@ -165,6 +165,7 @@ define void @float_to_half(float %0) strictfp {
 ; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NOF16C-NEXT:    flds {{[0-9]+}}(%esp)
 ; X32-NOF16C-NEXT:    fstps (%esp)
+; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __gnu_f2h_ieee
 ; X32-NOF16C-NEXT:    movw %ax, a
 ; X32-NOF16C-NEXT:    addl $12, %esp
@@ -212,6 +213,7 @@ define void @double_to_half(double %0) strictfp {
 ; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NOF16C-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X32-NOF16C-NEXT:    fstpl (%esp)
+; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __truncdfhf2
 ; X32-NOF16C-NEXT:    movw %ax, a
 ; X32-NOF16C-NEXT:    addl $12, %esp
@@ -264,6 +266,7 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NOF16C-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X32-NOF16C-NEXT:    fstpt (%esp)
+; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __truncxfhf2
 ; X32-NOF16C-NEXT:    movw %ax, a
 ; X32-NOF16C-NEXT:    addl $12, %esp
@@ -276,6 +279,7 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
 ; X32-F16C-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X32-F16C-NEXT:    fstpt (%esp)
+; X32-F16C-NEXT:    wait
 ; X32-F16C-NEXT:    calll __truncxfhf2
 ; X32-F16C-NEXT:    vpextrw $0, %xmm0, a
 ; X32-F16C-NEXT:    addl $12, %esp
@@ -288,6 +292,7 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 32
 ; X64-NOF16C-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NOF16C-NEXT:    fstpt (%rsp)
+; X64-NOF16C-NEXT:    wait
 ; X64-NOF16C-NEXT:    callq __truncxfhf2 at PLT
 ; X64-NOF16C-NEXT:    pextrw $0, %xmm0, %eax
 ; X64-NOF16C-NEXT:    movq a at GOTPCREL(%rip), %rcx
@@ -302,6 +307,7 @@ define void @fp80_to_half(x86_fp80 %0) strictfp {
 ; X64-F16C-NEXT:    .cfi_def_cfa_offset 32
 ; X64-F16C-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-F16C-NEXT:    fstpt (%rsp)
+; X64-F16C-NEXT:    wait
 ; X64-F16C-NEXT:    callq __truncxfhf2 at PLT
 ; X64-F16C-NEXT:    movq a at GOTPCREL(%rip), %rax
 ; X64-F16C-NEXT:    vpextrw $0, %xmm0, (%rax)
@@ -329,6 +335,7 @@ define void @add() strictfp {
 ; X32-NOF16C-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X32-NOF16C-NEXT:    faddp %st, %st(1)
 ; X32-NOF16C-NEXT:    fstps (%esp)
+; X32-NOF16C-NEXT:    wait
 ; X32-NOF16C-NEXT:    calll __gnu_f2h_ieee
 ; X32-NOF16C-NEXT:    movw %ax, c
 ; X32-NOF16C-NEXT:    addl $12, %esp
diff --git a/llvm/test/CodeGen/X86/ldexp-f80.ll b/llvm/test/CodeGen/X86/ldexp-f80.ll
index 6d733f602fc55c..3a10eab2f47cbb 100644
--- a/llvm/test/CodeGen/X86/ldexp-f80.ll
+++ b/llvm/test/CodeGen/X86/ldexp-f80.ll
@@ -26,6 +26,7 @@ define x86_fp80 @test_strict_ldexp_f80_i32(ptr addrspace(1) %out, x86_fp80 %a, i
 ; X64-NEXT:    movl %esi, %edi
 ; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
 ; X64-NEXT:    callq ldexpl at PLT
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll
index ee00f01af1a3d4..84c0ff61cf9b7d 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128.ll
@@ -271,6 +271,7 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
 ; SSE-X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; SSE-X86-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
 ; SSE-X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; SSE-X86-NEXT:    wait
 ; SSE-X86-NEXT:    calll fmaf
 ; SSE-X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; SSE-X86-NEXT:    wait
@@ -362,6 +363,7 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
 ; SSE-X86-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
 ; SSE-X86-NEXT:    movhps %xmm0, (%esp)
 ; SSE-X86-NEXT:    fstpl {{[0-9]+}}(%esp)
+; SSE-X86-NEXT:    wait
 ; SSE-X86-NEXT:    calll fma
 ; SSE-X86-NEXT:    fstpl {{[0-9]+}}(%esp)
 ; SSE-X86-NEXT:    wait