[llvm] dbcfbff - [PowerPC] Add intrinsic to read or set FPSCR register
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 10 03:28:15 PDT 2020
Author: Qiu Chaofan
Date: 2020-08-10T18:27:45+08:00
New Revision: dbcfbffc7ae46cc7b84257787681676144a1bd5f
URL: https://github.com/llvm/llvm-project/commit/dbcfbffc7ae46cc7b84257787681676144a1bd5f
DIFF: https://github.com/llvm/llvm-project/commit/dbcfbffc7ae46cc7b84257787681676144a1bd5f.diff
LOG: [PowerPC] Add intrinsic to read or set FPSCR register
This patch introduces two intrinsics: llvm.ppc.setflm and
llvm.ppc.readflm. They read from or write to FPSCR register
(floating-point status & control) which contains rounding mode and
exception status.
To ensure correctness of program, we need to prevent FP operations from
being moved across these intrinsics (mffs/mtfsf instruction), so here I
set them as scheduling boundaries. We can relax such restriction if
FPSCR is modeled well in the future.
Reviewed By: steven.zhang
Differential Revision: https://reviews.llvm.org/D84914
Added:
llvm/test/CodeGen/PowerPC/read-set-flm.ll
Modified:
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 8dc67b0d1a4f..ae25bb400e46 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -47,6 +47,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// eieio instruction
def int_ppc_eieio : Intrinsic<[],[],[]>;
+ // Get content from current FPSCR register
+ def int_ppc_readflm : Intrinsic<[llvm_double_ty], [], [IntrNoMem]>;
+ // Set FPSCR register, and return previous content
+ def int_ppc_setflm : Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8a14a271df30..e56670e27ccd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12135,6 +12135,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(NewFPSCRReg)
.addImm(0)
.addImm(0);
+ } else if (MI.getOpcode() == PPC::SETFLM) {
+ DebugLoc Dl = MI.getDebugLoc();
+
+ // Result of setflm is previous FPSCR content, so we need to save it first.
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // Put bits in 32:63 to FPSCR.
+ Register NewFPSCRReg = MI.getOperand(1).getReg();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
+ .addImm(255)
+ .addReg(NewFPSCRReg)
+ .addImm(0)
+ .addImm(0);
} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
return emitProbedAlloca(MI, BB);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 888a77464f34..e119a3246f31 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1644,6 +1644,17 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
return false;
}
+bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
+ // across them, since some FP operations may change content of FPSCR.
+ // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
+ if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
+ return true;
+ return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
+
bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
unsigned OpC = MI.getOpcode();
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index bdcfa76505da..d230db286ee7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -463,6 +463,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index ac91f26b4e03..fa6f911f83ad 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1514,6 +1514,9 @@ def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
"#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+
+def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM),
+ "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>;
}
let Defs = [LR] in
@@ -3269,7 +3272,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
-
+def : Pat<(int_ppc_readflm), (MFFS)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 028904fc3200..4c72065e1912 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -87,23 +87,23 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: bl __gcc_qsub
; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsb1 31
-; CHECK-NEXT: lis 3, .LCPI0_1 at ha
; CHECK-NEXT: mtfsb0 30
; CHECK-NEXT: fadd 1, 2, 1
; CHECK-NEXT: mtfsf 1, 0
; CHECK-NEXT: fctiwz 0, 1
-; CHECK-NEXT: mffs 1
; CHECK-NEXT: stfd 0, 160(1)
+; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsb1 31
+; CHECK-NEXT: lis 3, .LCPI0_1 at ha
; CHECK-NEXT: mtfsb0 30
-; CHECK-NEXT: fadd 0, 28, 29
-; CHECK-NEXT: mtfsf 1, 1
-; CHECK-NEXT: lfs 1, .LCPI0_1 at l(3)
-; CHECK-NEXT: fctiwz 0, 0
-; CHECK-NEXT: stfd 0, 152(1)
+; CHECK-NEXT: fadd 1, 28, 29
+; CHECK-NEXT: mtfsf 1, 0
+; CHECK-NEXT: lfs 0, .LCPI0_1 at l(3)
+; CHECK-NEXT: fctiwz 1, 1
+; CHECK-NEXT: stfd 1, 152(1)
; CHECK-NEXT: fcmpu 0, 28, 27
; CHECK-NEXT: lwz 3, 164(1)
-; CHECK-NEXT: fcmpu 1, 29, 1
+; CHECK-NEXT: fcmpu 1, 29, 0
; CHECK-NEXT: lwz 4, 156(1)
; CHECK-NEXT: crandc 20, 6, 0
; CHECK-NEXT: cror 20, 5, 20
@@ -209,25 +209,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: bl __gcc_qsub
; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsb1 31
-; CHECK-NEXT: lis 3, .LCPI0_2 at ha
; CHECK-NEXT: mtfsb0 30
; CHECK-NEXT: fadd 1, 2, 1
; CHECK-NEXT: mtfsf 1, 0
; CHECK-NEXT: fctiwz 0, 1
-; CHECK-NEXT: mffs 1
; CHECK-NEXT: stfd 0, 32(1)
+; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsb1 31
-; CHECK-NEXT: lfs 0, .LCPI0_2 at l(3)
-; CHECK-NEXT: lis 3, .LCPI0_3 at ha
+; CHECK-NEXT: lis 3, .LCPI0_2 at ha
+; CHECK-NEXT: lfs 2, .LCPI0_2 at l(3)
; CHECK-NEXT: mtfsb0 30
-; CHECK-NEXT: fadd 2, 28, 29
-; CHECK-NEXT: mtfsf 1, 1
-; CHECK-NEXT: lfs 1, .LCPI0_3 at l(3)
-; CHECK-NEXT: fctiwz 2, 2
-; CHECK-NEXT: stfd 2, 24(1)
-; CHECK-NEXT: fcmpu 0, 30, 0
+; CHECK-NEXT: lis 3, .LCPI0_3 at ha
+; CHECK-NEXT: fadd 1, 28, 29
+; CHECK-NEXT: mtfsf 1, 0
+; CHECK-NEXT: lfs 0, .LCPI0_3 at l(3)
+; CHECK-NEXT: fctiwz 1, 1
+; CHECK-NEXT: stfd 1, 24(1)
+; CHECK-NEXT: fcmpu 0, 30, 2
; CHECK-NEXT: lwz 3, 36(1)
-; CHECK-NEXT: fcmpu 1, 31, 1
+; CHECK-NEXT: fcmpu 1, 31, 0
; CHECK-NEXT: lwz 4, 28(1)
; CHECK-NEXT: crandc 20, 6, 1
; CHECK-NEXT: cror 20, 4, 20
@@ -264,25 +264,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: bl __gcc_qsub
; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsb1 31
-; CHECK-NEXT: lis 3, .LCPI0_0 at ha
; CHECK-NEXT: mtfsb0 30
; CHECK-NEXT: fadd 1, 2, 1
; CHECK-NEXT: mtfsf 1, 0
; CHECK-NEXT: fctiwz 0, 1
-; CHECK-NEXT: mffs 1
; CHECK-NEXT: stfd 0, 96(1)
+; CHECK-NEXT: mffs 0
; CHECK-NEXT: mtfsb1 31
-; CHECK-NEXT: lfs 0, .LCPI0_0 at l(3)
-; CHECK-NEXT: lis 3, .LCPI0_1 at ha
+; CHECK-NEXT: lis 3, .LCPI0_0 at ha
+; CHECK-NEXT: lfs 2, .LCPI0_0 at l(3)
; CHECK-NEXT: mtfsb0 30
-; CHECK-NEXT: fadd 2, 30, 31
-; CHECK-NEXT: mtfsf 1, 1
-; CHECK-NEXT: lfs 1, .LCPI0_1 at l(3)
-; CHECK-NEXT: fctiwz 2, 2
-; CHECK-NEXT: stfd 2, 88(1)
-; CHECK-NEXT: fcmpu 0, 30, 0
+; CHECK-NEXT: lis 3, .LCPI0_1 at ha
+; CHECK-NEXT: fadd 1, 30, 31
+; CHECK-NEXT: mtfsf 1, 0
+; CHECK-NEXT: lfs 0, .LCPI0_1 at l(3)
+; CHECK-NEXT: fctiwz 1, 1
+; CHECK-NEXT: stfd 1, 88(1)
+; CHECK-NEXT: fcmpu 0, 30, 2
; CHECK-NEXT: lwz 3, 100(1)
-; CHECK-NEXT: fcmpu 1, 31, 1
+; CHECK-NEXT: fcmpu 1, 31, 0
; CHECK-NEXT: lwz 4, 92(1)
; CHECK-NEXT: crandc 20, 6, 0
; CHECK-NEXT: cror 20, 5, 20
diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
new file mode 100644
index 000000000000..15f69022226f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple powerpc64le-unknown-linux | FileCheck %s
+; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \
+; RUN: 2>&1 | FileCheck %s --check-prefix=LOG
+
+define double @in_nostrict(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: in_nostrict:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mffs 0
+; CHECK-NEXT: xsdivdp 1, 1, 2
+; CHECK-NEXT: xsadddp 1, 1, 3
+; CHECK-NEXT: xsadddp 0, 1, 0
+; CHECK-NEXT: mtfsf 255, 4
+; CHECK-NEXT: xsdivdp 1, 3, 4
+; CHECK-NEXT: xsadddp 1, 1, 2
+; CHECK-NEXT: xsadddp 1, 0, 1
+; CHECK-NEXT: blr
+;
+; LOG: *** MI Scheduling ***
+; LOG-NEXT: in_nostrict:%bb.0 entry
+; LOG: ExitSU: MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0
+; LOG: *** MI Scheduling ***
+; LOG-NEXT: in_nostrict:%bb.0 entry
+; LOG: ExitSU: %{{[0-9]+}}:f8rc = MFFS implicit $rm
+;
+; LOG: *** MI Scheduling ***
+; LOG-NEXT: in_nostrict:%bb.0 entry
+; LOG: ExitSU: MTFSF 255, renamable $f{{[0-9]+}}, 0, 0
+entry:
+ %0 = tail call double @llvm.ppc.readflm()
+ %1 = fdiv double %a, %b
+ %2 = fadd double %1, %c
+ %3 = fadd double %2, %0
+ call double @llvm.ppc.setflm(double %d)
+ %5 = fdiv double %c, %d
+ %6 = fadd double %5, %b
+ %7 = fadd double %3, %6
+ ret double %7
+}
+
+define double @in_strict(double %a, double %b, double %c, double %d) #0 {
+; CHECK-LABEL: in_strict:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mffs 0
+; CHECK-NEXT: xsdivdp 1, 1, 2
+; CHECK-NEXT: xsadddp 1, 1, 3
+; CHECK-NEXT: xsadddp 0, 1, 0
+; CHECK-NEXT: mtfsf 255, 4
+; CHECK-NEXT: xsdivdp 1, 3, 4
+; CHECK-NEXT: xsadddp 1, 1, 2
+; CHECK-NEXT: xsadddp 1, 0, 1
+; CHECK-NEXT: blr
+;
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: in_strict:%bb.0 entry
+; LOG: ExitSU: MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: in_strict:%bb.0 entry
+; LOG: ExitSU: %{{[0-9]+}}:f8rc = MFFS implicit $rm
+;
+; LOG: ***** MI Scheduling *****
+; LOG-NEXT: in_strict:%bb.0 entry
+; LOG: ExitSU: MTFSF 255, renamable $f{{[0-9]+}}, 0, 0
+entry:
+ %0 = tail call double @llvm.ppc.readflm()
+ %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ %2 = call double @llvm.experimental.constrained.fadd.f64(double %1, double %c, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ %3 = call double @llvm.experimental.constrained.fadd.f64(double %2, double %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ call double @llvm.ppc.setflm(double %d)
+ %5 = call double @llvm.experimental.constrained.fdiv.f64(double %c, double %d, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ %6 = call double @llvm.experimental.constrained.fadd.f64(double %5, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ %7 = call double @llvm.experimental.constrained.fadd.f64(double %3, double %6, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret double %7
+}
+
+declare double @llvm.ppc.readflm()
+declare double @llvm.ppc.setflm(double)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+
+attributes #0 = { strictfp }
More information about the llvm-commits
mailing list