[llvm] fc0c72b - [X86] Add AVX512 support to the fast isel code for Intrinsic::convert_from_fp16/convert_to_fp16.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 14 10:57:29 PST 2020
Author: Craig Topper
Date: 2020-02-14T10:57:11-08:00
New Revision: fc0c72b2dfc1f6209f9d5a401730219727e166cc
URL: https://github.com/llvm/llvm-project/commit/fc0c72b2dfc1f6209f9d5a401730219727e166cc
DIFF: https://github.com/llvm/llvm-project/commit/fc0c72b2dfc1f6209f9d5a401730219727e166cc.diff
LOG: [X86] Add AVX512 support to the fast isel code for Intrinsic::convert_from_fp16/convert_to_fp16.
Added:
Modified:
llvm/lib/Target/X86/X86FastISel.cpp
llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index a135f100ba04..ccfa01677894 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2632,12 +2632,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// used to provide rounding control: use MXCSR.RC, encoded as 0b100.
// It's consistent with the other FP instructions, which are usually
// controlled by MXCSR.
- InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
+ unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
+ : X86::VCVTPS2PHrr;
+ InputReg = fastEmitInst_ri(Opc, RC, InputReg, false, 4);
// Move the lower 32-bits of ResultReg to another register of class GR32.
+ Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
+ : X86::VMOVPDI2DIrr;
ResultReg = createResultReg(&X86::GR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::VMOVPDI2DIrr), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(InputReg, RegState::Kill);
// The result value is in the lower 16-bits of ResultReg.
@@ -2653,11 +2656,13 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
InputReg, /*Kill=*/true);
- InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
+ unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
+ : X86::VCVTPH2PSrr;
+ InputReg = fastEmitInst_r(Opc, RC, InputReg, /*Kill=*/true);
// The result value is in the lower 32-bits of ResultReg.
// Emit an explicit copy from register class VR128 to register class FR32.
- ResultReg = createResultReg(&X86::FR32RegClass);
+ ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(InputReg, RegState::Kill);
diff --git a/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll b/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll
index acb85fd171f5..db892fc8ee98 100644
--- a/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-float-half-convertion.ll
@@ -1,4 +1,5 @@
; RUN: llc -fast-isel -fast-isel-abort=1 -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+avx512vl < %s | FileCheck %s
; Verify that fast-isel correctly expands float-half conversions.
More information about the llvm-commits
mailing list