[llvm] [X86] Resolve FIXME: Add FPCW as a rounding control register (PR #82452)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 11:14:46 PST 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/82452
>From 3f809108c40915564fbf4faf7a168c12811fbc6e Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Wed, 21 Feb 2024 14:53:23 -0500
Subject: [PATCH 1/2] [X86] Insert wait if instruction right before call is a
waiting one
The reason adding fpcr broke tests is because that caused LLVM to no longer kill the instruction before a call, which prevented LLVM from treating x87 as an operand, which meant the call was not eligible for a wait before it as a result.
This patch now has LLVM add wait to the end of a x87 instruction is a call is immediately after.
---
llvm/lib/Target/X86/X86InsertWait.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp
index 69a3d32a931498..10f141abe9c786 100644
--- a/llvm/lib/Target/X86/X86InsertWait.cpp
+++ b/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -115,7 +115,8 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
// If the following instruction is an X87 instruction and isn't an X87
// non-waiting control instruction, we can omit insert wait instruction.
MachineBasicBlock::iterator AfterMI = std::next(MI);
- if (AfterMI != MBB.end() && X86::isX87Instruction(*AfterMI) &&
+ if (AfterMI != MBB.end() && !AfterMI->isCall() &&
+ X86::isX87Instruction(*AfterMI) &&
!isX87NonWaitingControlInstruction(*AfterMI))
continue;
>From 251e5b32031d0d4ab5153160867d2eabb60d6969 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Tue, 20 Feb 2024 20:41:40 -0500
Subject: [PATCH 2/2] [X86] Resolve FIXME: Add FPCW as a rounding control
register
To prevent tests from breaking, another fix had to be made: Now, we check if the instruction after a waiting instruction is a call, and if so, we insert the wait.
---
llvm/lib/Target/X86/X86FloatingPoint.cpp | 3 +
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 4 +-
llvm/lib/Target/X86/X86InsertWait.cpp | 4 +
llvm/test/CodeGen/X86/pr59305.ll | 89 +++++++++++++++------
4 files changed, 71 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index ca4d03913d093e..5064e75fb44348 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -432,6 +432,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
if (MI.isCall())
FPInstClass = X86II::SpecialFP;
+ if (MI.isReturn())
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index be8275c92e11ae..c7ef11aede886a 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -670,9 +670,7 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
}
ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
- // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
- // tests at the moment, which is not what we expected.
- static const MCPhysReg RCRegs[] = {X86::MXCSR};
+ static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
return RCRegs;
}
diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp
index 10f141abe9c786..9fa43a802d3907 100644
--- a/llvm/lib/Target/X86/X86InsertWait.cpp
+++ b/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -106,16 +106,20 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
// Jump non X87 instruction.
if (!X86::isX87Instruction(*MI))
continue;
+
// If the instruction instruction neither has float exception nor is
// a load/store instruction, or the instruction is x87 control
// instruction, do not insert wait.
if (!(MI->mayRaiseFPException() || MI->mayLoadOrStore()) ||
isX87ControlInstruction(*MI))
continue;
+
// If the following instruction is an X87 instruction and isn't an X87
// non-waiting control instruction, we can omit insert wait instruction.
MachineBasicBlock::iterator AfterMI = std::next(MI);
if (AfterMI != MBB.end() && !AfterMI->isCall() &&
+ !AfterMI->isTerminator() && !AfterMI->isReturn() &&
+ !AfterMI->hasUnmodeledSideEffects() &&
X86::isX87Instruction(*AfterMI) &&
!isX87NonWaitingControlInstruction(*AfterMI))
continue;
diff --git a/llvm/test/CodeGen/X86/pr59305.ll b/llvm/test/CodeGen/X86/pr59305.ll
index c2f6d21a41d4dc..4172aa6204def2 100644
--- a/llvm/test/CodeGen/X86/pr59305.ll
+++ b/llvm/test/CodeGen/X86/pr59305.ll
@@ -1,32 +1,69 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X86-64
+; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86
define double @foo(double %0) #0 {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: subq $24, %rsp
-; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
-; CHECK-NEXT: movl $1024, %edi # imm = 0x400
-; CHECK-NEXT: callq fesetround at PLT
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
-; CHECK-NEXT: divsd (%rsp), %xmm1 # 8-byte Folded Reload
-; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movl $1024, %edi # imm = 0x400
-; CHECK-NEXT: callq fesetround at PLT
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; CHECK-NEXT: divsd (%rsp), %xmm0 # 8-byte Folded Reload
-; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: movl $1024, %edi # imm = 0x400
-; CHECK-NEXT: callq fesetround at PLT
-; CHECK-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
-; CHECK-NEXT: divsd (%rsp), %xmm2 # 8-byte Folded Reload
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero
-; CHECK-NEXT: callq fma at PLT
-; CHECK-NEXT: addq $24, %rsp
-; CHECK-NEXT: retq
+; X86-64-LABEL: foo:
+; X86-64: # %bb.0:
+; X86-64-NEXT: subq $24, %rsp
+; X86-64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; X86-64-NEXT: movl $1024, %edi # imm = 0x400
+; X86-64-NEXT: callq fesetround at PLT
+; X86-64-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; X86-64-NEXT: divsd (%rsp), %xmm1 # 8-byte Folded Reload
+; X86-64-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X86-64-NEXT: movl $1024, %edi # imm = 0x400
+; X86-64-NEXT: callq fesetround at PLT
+; X86-64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; X86-64-NEXT: divsd (%rsp), %xmm0 # 8-byte Folded Reload
+; X86-64-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X86-64-NEXT: movl $1024, %edi # imm = 0x400
+; X86-64-NEXT: callq fesetround at PLT
+; X86-64-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X86-64-NEXT: divsd (%rsp), %xmm2 # 8-byte Folded Reload
+; X86-64-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; X86-64-NEXT: # xmm0 = mem[0],zero
+; X86-64-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; X86-64-NEXT: # xmm1 = mem[0],zero
+; X86-64-NEXT: callq fma at PLT
+; X86-64-NEXT: addq $24, %rsp
+; X86-64-NEXT: retq
+;
+; X86-LABEL: foo:
+; X86: # %bb.0:
+; X86-NEXT: subl $60, %esp
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT: wait
+; X86-NEXT: movl $1024, (%esp) # imm = 0x400
+; X86-NEXT: calll fesetround at PLT
+; X86-NEXT: fld1
+; X86-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fdivrp %st, %st(1)
+; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT: wait
+; X86-NEXT: movl $1024, (%esp) # imm = 0x400
+; X86-NEXT: calll fesetround at PLT
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fdivp %st, %st(1)
+; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT: wait
+; X86-NEXT: movl $1024, (%esp) # imm = 0x400
+; X86-NEXT: calll fesetround at PLT
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fdivp %st, %st(1)
+; X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fstpl {{[0-9]+}}(%esp)
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: wait
+; X86-NEXT: calll fma
+; X86-NEXT: addl $60, %esp
+; X86-NEXT: retl
%2 = call i32 @fesetround(i32 noundef 1024)
%3 = call double @llvm.experimental.constrained.fdiv.f64(double 1.000000e+00, double %0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
%4 = call i32 @fesetround(i32 noundef 1024)
More information about the llvm-commits
mailing list