[llvm] r299112 - [AVX-512] Fix another case where fastisel was generating a GR8 to VK1 copy. This time after calls returning i1.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 30 14:02:52 PDT 2017
Author: ctopper
Date: Thu Mar 30 16:02:52 2017
New Revision: 299112
URL: http://llvm.org/viewvc/llvm-project?rev=299112&view=rev
Log:
[AVX-512] Fix another case where fastisel was generating a GR8 to VK1 copy. This time after calls returning i1.
Fixes PR32472.
Modified:
llvm/trunk/lib/Target/X86/X86FastISel.cpp
Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=299112&r1=299111&r2=299112&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Thu Mar 30 16:02:52 2017
@@ -3525,6 +3525,7 @@ bool X86FastISel::fastLowerCall(CallLowe
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
unsigned CopyReg = ResultReg + i;
+ unsigned SrcReg = VA.getLocReg();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
@@ -3532,9 +3533,18 @@ bool X86FastISel::fastLowerCall(CallLowe
report_fatal_error("SSE register return with SSE disabled");
}
+ // If the return value is an i1 and AVX-512 is enabled, stop.
+ if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) {
+ // Need to copy to a GR32 first.
+ // TODO: MOVZX isn't great here. We don't care about the upper bits.
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL);
+ }
+
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
CopyVT = MVT::f80;
CopyReg = createResultReg(&X86::RFP80RegClass);
@@ -3542,7 +3552,7 @@ bool X86FastISel::fastLowerCall(CallLowe
// Copy out the result.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
InRegs.push_back(VA.getLocReg());
// Round the f80 to the right size, which also moves it to the appropriate
More information about the llvm-commits
mailing list