[llvm] 45d9280 - [PowerPC] use inst-level fast-math-flags to drive MachineCombiner
Chen Zheng via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 28 00:33:35 PDT 2020
Author: Chen Zheng
Date: 2020-04-28T03:31:12-04:00
New Revision: 45d92806eaf8bd1d0edb94e983ac540f1633a2a6
URL: https://github.com/llvm/llvm-project/commit/45d92806eaf8bd1d0edb94e983ac540f1633a2a6
DIFF: https://github.com/llvm/llvm-project/commit/45d92806eaf8bd1d0edb94e983ac540f1633a2a6.diff
LOG: [PowerPC] use inst-level fast-math-flags to drive MachineCombiner
Currently, on PowerPC target, it uses function scope UnsafeFPMath
option to drive Machine Combiner pass.
This is not accurate in two ways:
1: the scope is not accurate. Machine Combiner pass only requires
instruction-level flags instead of the function scope.
2: the float point flag is not accurate. Machine Combiner pass
only requires float point flags reassoc and nsz.
Reviewed By: steven.zhang
Differential Revision: https://reviews.llvm.org/D78183
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/test/CodeGen/PowerPC/machine-combiner.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 709b21384538..eb5e2ffe243f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -225,6 +225,26 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+ MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const {
+ // Propagate FP flags from the original instructions.
+ // But clear poison-generating flags because those may not be valid now.
+ uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ NewMI1.setFlags(IntersectedFlags);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+ NewMI2.setFlags(IntersectedFlags);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+}
+
// This function does not list all associative and commutative operations, but
// only those worth feeding through the machine combiner in an attempt to
// reduce the critical path. Mostly, this means floating-point operations,
@@ -258,7 +278,8 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case PPC::QVFMUL:
case PPC::QVFMULS:
case PPC::QVFMULSs:
- return true;
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
default:
return false;
}
@@ -272,10 +293,6 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
return false;
- // FP reassociation is only legal when we don't need strict IEEE semantics.
- if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
- return false;
-
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 5459fa3ee216..d8c8388e4554 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -248,6 +248,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const override;
diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
index 47df8921fb6f..cc8a2ec5de72 100644
--- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll
+++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
-; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
+; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
+; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -14,9 +14,9 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
; CHECK: fadds 1, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd float %x0, %x1
- %t1 = fadd float %t0, %x2
- %t2 = fadd float %t1, %x3
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %t1, %x3
ret float %t2
}
@@ -28,9 +28,9 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; CHECK: fadds 1, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd float %x0, %x1
- %t1 = fadd float %x2, %t0
- %t2 = fadd float %t1, %x3
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %t1, %x3
ret float %t2
}
@@ -42,9 +42,9 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; CHECK: fadds 1, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd float %x0, %x1
- %t1 = fadd float %t0, %x2
- %t2 = fadd float %x3, %t1
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %x3, %t1
ret float %t2
}
@@ -56,9 +56,9 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; CHECK: fadds 1, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd float %x0, %x1
- %t1 = fadd float %x2, %t0
- %t2 = fadd float %x3, %t1
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %x3, %t1
ret float %t2
}
@@ -77,13 +77,13 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
; CHECK: fadds 1, [[REG2]], 8
; CHECK-NEXT: blr
- %t0 = fadd float %x0, %x1
- %t1 = fadd float %t0, %x2
- %t2 = fadd float %t1, %x3
- %t3 = fadd float %t2, %x4
- %t4 = fadd float %t3, %x5
- %t5 = fadd float %t4, %x6
- %t6 = fadd float %t5, %x7
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %t1, %x3
+ %t3 = fadd reassoc nsz float %t2, %x4
+ %t4 = fadd reassoc nsz float %t3, %x5
+ %t5 = fadd reassoc nsz float %t4, %x6
+ %t6 = fadd reassoc nsz float %t5, %x7
ret float %t6
}
@@ -100,9 +100,9 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd <4 x float> %x0, %x1
- %t1 = fadd <4 x float> %t0, %x2
- %t2 = fadd <4 x float> %t1, %x3
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+ %t2 = fadd reassoc nsz <4 x float> %t1, %x3
ret <4 x float> %t2
}
@@ -117,9 +117,9 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd <4 x float> %x0, %x1
- %t1 = fadd <4 x float> %x2, %t0
- %t2 = fadd <4 x float> %t1, %x3
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x float> %t1, %x3
ret <4 x float> %t2
}
@@ -134,9 +134,9 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd <4 x float> %x0, %x1
- %t1 = fadd <4 x float> %t0, %x2
- %t2 = fadd <4 x float> %x3, %t1
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+ %t2 = fadd reassoc nsz <4 x float> %x3, %t1
ret <4 x float> %t2
}
@@ -151,9 +151,9 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
- %t0 = fadd <4 x float> %x0, %x1
- %t1 = fadd <4 x float> %x2, %t0
- %t2 = fadd <4 x float> %x3, %t1
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x float> %x3, %t1
ret <4 x float> %t2
}
More information about the llvm-commits
mailing list