[llvm] cf81714 - [X86] Model MXCSR for AVX instructions other than AVX512
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 2 16:54:07 PST 2019
Author: Wang, Pengfei
Date: 2019-12-03T08:53:47+08:00
New Revision: cf81714a7eb367260f9e6ae5f3bb11bb63d39124
URL: https://github.com/llvm/llvm-project/commit/cf81714a7eb367260f9e6ae5f3bb11bb63d39124
DIFF: https://github.com/llvm/llvm-project/commit/cf81714a7eb367260f9e6ae5f3bb11bb63d39124.diff
LOG: [X86] Model MXCSR for AVX instructions other than AVX512
Summary: Model MXCSR for AVX instructions other than AVX512
Reviewers: craig.topper, RKSimon
Subscribers: hiraditya, llvm-commits, LuoYuanke, LiuChen3
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70875
Added:
Modified:
llvm/lib/Target/X86/X86InstrFMA.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/mxcsr-reg-usage.ll
llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 0cca71bdc431..4c84f4f2460d 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -95,7 +95,8 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
-let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1,
+ Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy, string Suff,
PatFrag MemFrag128, PatFrag MemFrag256,
@@ -237,7 +238,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
}
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
- hasSideEffects = 0 in
+ hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
@@ -263,7 +264,8 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// the lowest element of the FMA*_Int instruction. Even though such analysis
// may be not implemented yet we allow the routines doing the actual commute
// transformation to decide if one or another instruction is commutable or not.
-let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
Operand memopr, RegisterClass RC,
X86FoldableSchedWrite sched> {
@@ -384,6 +386,7 @@ defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR6
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
PatFrag mem_frag, X86FoldableSchedWrite sched> {
@@ -425,7 +428,8 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ValueType VT, X86FoldableSchedWrite sched> {
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0,
+ Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -458,6 +462,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
} // isCodeGenOnly = 1
}
+let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index ed376d4ce96f..b8e80bcd566a 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5542,7 +5542,7 @@ let ExeDomain = SSEPackedDouble in {
// FP round - roundss, roundps, roundsd, roundpd
let Predicates = [HasAVX, NoVLX] in {
- let ExeDomain = SSEPackedSingle in {
+ let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
@@ -5552,7 +5552,7 @@ let Predicates = [HasAVX, NoVLX] in {
VEX, VEX_L, VEX_WIG;
}
- let ExeDomain = SSEPackedDouble in {
+ let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
@@ -5564,9 +5564,9 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [UseAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
- VEX_4V, VEX_LIG, VEX_WIG;
+ VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
- VEX_4V, VEX_LIG, VEX_WIG;
+ VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
}
let Predicates = [UseAVX] in {
@@ -7326,12 +7326,12 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
}
let Predicates = [HasF16C, NoVLX] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
- WriteCvtPS2PHSt>;
+ WriteCvtPS2PHSt>, SIMD_EXC;
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
- WriteCvtPS2PHYSt>, VEX_L;
+ WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
// Pattern match vcvtph2ps of a scalar i64 load.
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
diff --git a/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll
index da7653255a8d..d88becc251d8 100644
--- a/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll
+++ b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=+mmx -stop-after finalize-isel -o - %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=+mmx,+fma,+f16c -stop-after finalize-isel -o - %s | FileCheck %s
; This test ensures that the MXCSR is implicitly used by MMX FP instructions.
define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
@@ -15,8 +15,31 @@ define x86_mmx @mxcsr_mmx(<4 x float> %a0) {
ret x86_mmx %5
}
+define half @mxcsr_f16c(float %a) {
+; CHECK: VCVTPS2PH{{.*}}mxcsr
+; CHECK: VCVTPH2PS{{.*}}mxcsr
+ %res = fptrunc float %a to half
+ ret half %res
+}
+
+define <4 x float> @mxcsr_fma_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK: VFMADD{{.*}}mxcsr
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float>
+%a)
+ ret <4 x float> %res
+}
+
+define <4 x float> @mxcsr_fma_ps(<4 x float> %a, <4 x float> %b) {
+; CHECK: VFMADD{{.*}}mxcsr
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float>
+%a)
+ ret <4 x float> %res
+}
+
declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>)
declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx)
declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>)
declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>)
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
diff --git a/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s b/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s
index c629351690cc..9044fbb4353d 100644
--- a/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s
+++ b/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s
@@ -6,5 +6,4 @@ CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: VFMADDSS4rm
CHECK: register_initial_values:
-# FIXME: This will be changed to CHECK by the following patch that modeling MXCSR to VFMADDSS.
-CHECK-NOT: MXCSR
+CHECK: MXCSR
More information about the llvm-commits
mailing list