[llvm] r240616 - [X86] Accept hasAVX512() as well as hasFMA() when generating FMA.
Ahmed Bougacha
ahmed.bougacha at gmail.com
Wed Jun 24 17:44:46 PDT 2015
Author: ab
Date: Wed Jun 24 19:44:46 2015
New Revision: 240616
URL: http://llvm.org/viewvc/llvm-project?rev=240616&view=rev
Log:
[X86] Accept hasAVX512() as well as hasFMA() when generating FMA.
We don't always have FMA, for example when using 'clang -mavx512f'
without an explicit CPU.
Also check for an explicit +avx512f instead of CPUs in a couple
related tests.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-fma.ll
llvm/trunk/test/CodeGen/X86/fma.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=240616&r1=240615&r2=240616&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jun 24 19:44:46 2015
@@ -1111,7 +1111,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
- if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
@@ -18636,7 +18636,7 @@ bool X86TargetLowering::isVectorLoadExtD
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+ if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
return false;
VT = VT.getScalarType();
@@ -19821,6 +19821,7 @@ X86TargetLowering::emitEHSjLjLongJmp(Mac
// Replace 213-type (isel default) FMA3 instructions with 231-type for
// accumulator loops. Writing back to the accumulator allows the coalescer
// to remove extra copies in the loop.
+// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937).
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -24182,7 +24183,8 @@ static SDValue PerformFMACombine(SDNode
EVT ScalarVT = VT.getScalarType();
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
- (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
+ !Subtarget->hasAVX512()))
return SDValue();
SDValue A = N->getOperand(0);
Modified: llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll?rev=240616&r1=240615&r2=240616&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-fma-intrinsics.ll Wed Jun 24 19:44:46 2015
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
Modified: llvm/trunk/test/CodeGen/X86/avx512-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-fma.ll?rev=240616&r1=240615&r2=240616&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-fma.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-fma.ll Wed Jun 24 19:44:46 2015
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
; CHECK-LABEL: test_x86_fmadd_ps_z
; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma.ll?rev=240616&r1=240615&r2=240616&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma.ll Wed Jun 24 19:44:46 2015
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+avx512f,-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
More information about the llvm-commits
mailing list