[llvm] r353489 - [X86] Add FPCW as a register and start using it as an implicit use on floating point instructions.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 7 16:44:39 PST 2019


Author: ctopper
Date: Thu Feb  7 16:44:39 2019
New Revision: 353489

URL: http://llvm.org/viewvc/llvm-project?rev=353489&view=rev
Log:
[X86] Add FPCW as a register and start using it as an implicit use on floating point instructions.

Summary:
FPCW contains the rounding mode control which we manipulate to implement fp to integer conversion by changing the roudning mode, storing the value to the stack, and then changing the rounding mode back. Because we didn't model FPCW and its dependency chain, other instructions could be scheduled into the middle of the sequence.

This patch introduces the register and adds it as an implciit def of FLDCW and implicit use of the FP binary arithmetic instructions and store instructions. There are more instructions that need to be updated, but this is a good start. I believe this fixes at least the reduced test case from PR40529.

Reviewers: RKSimon, spatel, rnk, efriedma, andrew.w.kaylor

Subscribers: dim, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D57735

Modified:
    llvm/trunk/lib/Target/X86/X86InstrFPStack.td
    llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
    llvm/trunk/lib/Target/X86/X86RegisterInfo.td
    llvm/trunk/test/CodeGen/MIR/X86/memory-operands.mir
    llvm/trunk/test/CodeGen/X86/ipra-reg-usage.ll
    llvm/trunk/test/CodeGen/X86/pr34080.ll
    llvm/trunk/test/CodeGen/X86/pr40529.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Thu Feb  7 16:44:39 2019
@@ -229,7 +229,7 @@ def _FI32m  : FPI<0xDA, fp, (outs), (ins
 } // mayLoad = 1, hasSideEffects = 1
 }
 
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
 // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
 // resources.
 let hasNoSchedulingInfo = 1 in {
@@ -266,7 +266,7 @@ class FPrST0PInst<Format fp, string asm>
 // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
 // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
 // we have to put some 'r's in and take them out of weird places.
-let SchedRW = [WriteFAdd], Defs = [FPSW] in {
+let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in {
 def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t{$op, %st|st, $op}">;
 def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st, $op|$op, st}">;
 def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t{%st, $op|$op, st}">;
@@ -277,16 +277,16 @@ def SUB_FST0r   : FPST0rInst <MRM4r, "fs
 def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st, $op|$op, st}">;
 def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t{%st, $op|$op, st}">;
 } // SchedRW
-let SchedRW = [WriteFCom], Defs = [FPSW] in {
+let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in {
 def COM_FST0r   : FPST0rInst <MRM2r, "fcom\t$op">;
 def COMP_FST0r  : FPST0rInst <MRM3r, "fcomp\t$op">;
 } // SchedRW
-let SchedRW = [WriteFMul], Defs = [FPSW] in {
+let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in {
 def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t{$op, %st|st, $op}">;
 def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st, $op|$op, st}">;
 def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t{%st, $op|$op, st}">;
 } // SchedRW
-let SchedRW = [WriteFDiv], Defs = [FPSW] in {
+let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in {
 def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t{$op, %st|st, $op}">;
 def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st, $op|$op, st}">;
 def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t{%st, $op|$op, st}">;
@@ -306,7 +306,7 @@ def _Fp80  : FpI_<(outs RFP80:$dst), (in
 def _F     : FPI<0xD9, fp, (outs), (ins), asmstring>;
 }
 
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
 
 let SchedRW = [WriteFSign] in {
 defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
@@ -334,7 +334,7 @@ def TST_F  : FPI<0xD9, MRM_E4, (outs), (
 
 // Versions of FP instructions that take a single memory operand.  Added for the
 //   disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFComLd], Defs = [FPSW] in {
+let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in {
 def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
 def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
 
@@ -453,7 +453,7 @@ def ILD_Fp64m80: FpI_<(outs RFP80:$dst),
                   [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
 } // SchedRW
 
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStore], Uses = [FPCW] in {
 def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
                   [(store RFP32:$src, addr:$op)]>;
 def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
@@ -488,7 +488,7 @@ def IST_Fp16m80  : FpI_<(outs), (ins i16
 def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
 } // mayStore
-} // SchedRW
+} // SchedRW, Uses = [FPCW]
 
 let mayLoad = 1, SchedRW = [WriteLoad] in {
 def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
@@ -498,7 +498,7 @@ def ILD_F16m  : FPI<0xDF, MRM0m, (outs),
 def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
 def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
 }
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
 def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
 def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
 def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
@@ -512,7 +512,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs),
 }
 
 // FISTTP requires SSE3 even though it's a FPStack op.
-let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
+let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in {
 def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
                     [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
 def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
@@ -533,7 +533,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64
                     [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
 } // Predicates = [HasSSE3]
 
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
 def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
 def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
 def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
@@ -569,7 +569,7 @@ def LD_F0 : FPI<0xD9, MRM_EE, (outs), (i
 let SchedRW = [WriteFLD1] in
 def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
 
-let SchedRW = [WriteFLDC], Defs = [FPSW] in {
+let SchedRW = [WriteFLDC] in {
 def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
 def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
 def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
@@ -578,7 +578,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (in
 } // SchedRW
 
 // Floating point compares.
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Uses = [FPCW] in {
 def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
 def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -590,19 +590,19 @@ def UCOM_Fpr80 : FpI_  <(outs), (ins RFP
 
 let SchedRW = [WriteFCom] in {
 // CC = ST(0) cmp ST(i)
-let Defs = [EFLAGS, FPSW] in {
-let Predicates = [FPStackf32, HasCMov] in
-def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
-let Predicates = [FPStackf64, HasCMov] in
-def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
-let Predicates = [HasCMov] in
+let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
+def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>,
+                  Requires<[FPStackf32, HasCMov]>;
+def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>,
+                  Requires<[FPStackf64, HasCMov]>;
 def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>,
+                  Requires<[HasCMov]>;
 }
 
-let Defs = [FPSW], Uses = [ST0] in {
+let Defs = [FPSW], Uses = [ST0, FPCW] in {
 def UCOM_Fr    : FPI<0xDD, MRM4r,    // FPSW = cmp ST(0) with ST(i)
                     (outs), (ins RSTi:$reg), "fucom\t$reg">;
 def UCOM_FPr   : FPI<0xDD, MRM5r,    // FPSW = cmp ST(0) with ST(i), pop
@@ -611,14 +611,12 @@ def UCOM_FPPr  : FPI<0xDA, MRM_E9,
                     (outs), (ins), "fucompp">;
 }
 
-let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in {
 def UCOM_FIr   : FPI<0xDB, MRM5r,     // CC = cmp ST(0) with ST(i)
                     (outs), (ins RSTi:$reg), "fucomi\t{$reg, %st|st, $reg}">;
 def UCOM_FIPr  : FPI<0xDF, MRM5r,     // CC = cmp ST(0) with ST(i), pop
                     (outs), (ins RSTi:$reg), "fucompi\t{$reg, %st|st, $reg}">;
-}
 
-let Defs = [EFLAGS, FPSW] in {
 def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RSTi:$reg),
                   "fcomi\t{$reg, %st|st, $reg}">;
 def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
@@ -632,12 +630,12 @@ let Defs = [AX], Uses = [FPSW] in
 def FNSTSW16r : I<0xDF, MRM_E0,                  // AX = fp flags
                   (outs), (ins), "fnstsw\t{%ax|ax}",
                   [(set AX, (X86fp_stsw FPSW))]>;
-let Defs = [FPSW] in
+let Defs = [FPSW], Uses = [FPCW] in
 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                   (outs), (ins i16mem:$dst), "fnstcw\t$dst",
                   [(X86fp_cwd_get16 addr:$dst)]>;
 } // SchedRW
-let Defs = [FPSW], mayLoad = 1 in
+let Defs = [FPSW,FPCW], mayLoad = 1 in
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
                 Sched<[WriteLoad]>;

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Thu Feb  7 16:44:39 2019
@@ -496,6 +496,9 @@ BitVector X86RegisterInfo::getReservedRe
   BitVector Reserved(getNumRegs());
   const X86FrameLowering *TFI = getFrameLowering(MF);
 
+  // Set the floating point control register as reserved.
+  Reserved.set(X86::FPCW);
+
   // Set the stack-pointer register and its aliases as reserved.
   for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
        ++I)

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Thu Feb  7 16:44:39 2019
@@ -289,6 +289,9 @@ def ST7 : X86Reg<"st(7)", 7>, DwarfRegNu
 // Floating-point status word
 def FPSW : X86Reg<"fpsr", 0>;
 
+// Floating-point control word
+def FPCW : X86Reg<"fpcr", 0>;
+
 // Status flags register.
 //
 // Note that some flags that are commonly thought of as part of the status

Modified: llvm/trunk/test/CodeGen/MIR/X86/memory-operands.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/X86/memory-operands.mir?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/X86/memory-operands.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/X86/memory-operands.mir Thu Feb  7 16:44:39 2019
@@ -359,8 +359,8 @@ body: |
     CFI_INSTRUCTION def_cfa_offset 32
     LD_F80m $rsp, 1, $noreg, 32, $noreg, implicit-def dead $fpsw
   ; CHECK: name: stack_psv
-  ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw :: (store 10 into stack, align 16)
-    ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw :: (store 10 into stack, align 16)
+  ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16)
+    ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16)
     CALL64pcrel32 &cosl, csr_64, implicit $rsp, implicit-def $rsp, implicit-def $fp0
     $rsp = ADD64ri8 $rsp, 24, implicit-def dead $eflags
     RETQ

Modified: llvm/trunk/test/CodeGen/X86/ipra-reg-usage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ipra-reg-usage.ll?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ipra-reg-usage.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ipra-reg-usage.ll Thu Feb  7 16:44:39 2019
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-unknown"
 declare void @bar1()
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh
   call void @bar1()
   call void @bar2()
   ret void

Modified: llvm/trunk/test/CodeGen/X86/pr34080.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34080.ll?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34080.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34080.ll Thu Feb  7 16:44:39 2019
@@ -65,12 +65,12 @@ define void @_Z1fe(x86_fp80 %z) local_un
 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -64(%rbp)
 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -32(%rbp)
 ; SSE2-SCHEDULE-NEXT:    fsubl -32(%rbp)
-; SSE2-SCHEDULE-NEXT:    fnstcw -2(%rbp)
 ; SSE2-SCHEDULE-NEXT:    flds {{.*}}(%rip)
+; SSE2-SCHEDULE-NEXT:    fnstcw -2(%rbp)
+; SSE2-SCHEDULE-NEXT:    fmul %st, %st(1)
 ; SSE2-SCHEDULE-NEXT:    movzwl -2(%rbp), %eax
 ; SSE2-SCHEDULE-NEXT:    movw $3199, -2(%rbp) ## imm = 0xC7F
 ; SSE2-SCHEDULE-NEXT:    fldcw -2(%rbp)
-; SSE2-SCHEDULE-NEXT:    fmul %st, %st(1)
 ; SSE2-SCHEDULE-NEXT:    movw %ax, -2(%rbp)
 ; SSE2-SCHEDULE-NEXT:    fxch %st(1)
 ; SSE2-SCHEDULE-NEXT:    fistl -12(%rbp)

Modified: llvm/trunk/test/CodeGen/X86/pr40529.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr40529.ll?rev=353489&r1=353488&r2=353489&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr40529.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr40529.ll Thu Feb  7 16:44:39 2019
@@ -15,12 +15,12 @@ define x86_fp80 @rem_pio2l_min(x86_fp80
 ; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fisubl -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    flds {{.*}}(%rip)
+; CHECK-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fmul %st, %st(1)
 ; CHECK-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
 ; CHECK-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    fmul %st, %st(1)
 ; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fxch %st(1)
 ; CHECK-NEXT:    fistl -{{[0-9]+}}(%rsp)




More information about the llvm-commits mailing list