[llvm] r247113 - SelectionDAG: Support Expand of f16 extloads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 8 18:12:28 PDT 2015
Author: arsenm
Date: Tue Sep 8 20:12:27 2015
New Revision: 247113
URL: http://llvm.org/viewvc/llvm-project?rev=247113&view=rev
Log:
SelectionDAG: Support Expand of f16 extloads
Currently this hits an assert that extload should
always be supported, which assumes integer extloads.
This moves a hack out of SI's argument lowering and
is covered by existing tests.
Modified:
llvm/trunk/include/llvm/CodeGen/ValueTypes.h
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/lib/IR/ValueTypes.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/half.ll
Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=247113&r1=247112&r2=247113&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Tue Sep 8 20:12:27 2015
@@ -89,6 +89,19 @@ namespace llvm {
return VecTy;
}
+ /// Return the type converted to an equivalently sized integer or vector
+ /// with integer element type. Similar to changeVectorElementTypeToInteger,
+ /// but also handles scalars.
+ EVT changeTypeToInteger() {
+ if (isVector())
+ return changeVectorElementTypeToInteger();
+
+ if (isSimple())
+ return MVT::getIntegerVT(getSizeInBits());
+
+ return changeExtendedTypeToInteger();
+ }
+
/// isSimple - Test if the given EVT is simple (as opposed to being
/// extended).
bool isSimple() const {
@@ -342,6 +355,7 @@ namespace llvm {
// Methods for handling the Extended-type case in functions above.
// These are all out-of-line to prevent users of this header file
// from having a dependency on Type.h.
+ EVT changeExtendedTypeToInteger() const;
EVT changeExtendedVectorElementTypeToInteger() const;
static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=247113&r1=247112&r2=247113&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue Sep 8 20:12:27 2015
@@ -1095,7 +1095,8 @@ void SelectionDAGLegalize::LegalizeLoadO
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
@@ -1114,6 +1115,23 @@ void SelectionDAGLegalize::LegalizeLoadO
Chain = Load.getValue(1);
break;
}
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+ Chain, Ptr, ISrcVT,
+ LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
}
assert(!SrcVT.isVector() &&
Modified: llvm/trunk/lib/IR/ValueTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/ValueTypes.cpp?rev=247113&r1=247112&r2=247113&view=diff
==============================================================================
--- llvm/trunk/lib/IR/ValueTypes.cpp (original)
+++ llvm/trunk/lib/IR/ValueTypes.cpp Tue Sep 8 20:12:27 2015
@@ -19,6 +19,11 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+EVT EVT::changeExtendedTypeToInteger() const {
+ LLVMContext &Context = LLVMTy->getContext();
+ return getIntegerVT(Context, getSizeInBits());
+}
+
EVT EVT::changeExtendedVectorElementTypeToInteger() const {
LLVMContext &Context = LLVMTy->getContext();
EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=247113&r1=247112&r2=247113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Sep 8 20:12:27 2015
@@ -464,12 +464,6 @@ bool SITargetLowering::shouldConvertCons
return TII->isInlineConstant(Imm);
}
-static EVT toIntegerVT(EVT VT) {
- if (VT.isVector())
- return VT.changeVectorElementTypeToInteger();
- return MVT::getIntegerVT(VT.getSizeInBits());
-}
-
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
@@ -493,30 +487,10 @@ SDValue SITargetLowering::LowerParameter
unsigned Align = DL.getABITypeAlignment(Ty);
- if (VT != MemVT && VT.isFloatingPoint()) {
- // Do an integer load and convert.
- // FIXME: This is mostly because load legalization after type legalization
- // doesn't handle FP extloads.
- assert(VT.getScalarType() == MVT::f32 &&
- MemVT.getScalarType() == MVT::f16);
-
- EVT IVT = toIntegerVT(VT);
- EVT MemIVT = toIntegerVT(MemVT);
- SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
- IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
- false, // isVolatile
- true, // isNonTemporal
- true, // isInvariant
- Align); // Alignment
- SDValue Ops[] = {
- DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
- Load.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, SL);
- }
-
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (MemVT.isFloatingPoint())
+ ExtTy = ISD::EXTLOAD;
+
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
Modified: llvm/trunk/test/CodeGen/AMDGPU/half.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/half.ll?rev=247113&r1=247112&r2=247113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/half.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/half.ll Tue Sep 8 20:12:27 2015
@@ -112,12 +112,24 @@ define void @extload_v8f16_to_v8f32_arg(
}
; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
+; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
+; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
+; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
+; GCN: buffer_store_dwordx2 [[RESULT]]
define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
%ext = fpext half %arg to double
store double %ext, double addrspace(1)* %out
ret void
}
+
; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
%ext = fpext <2 x half> %arg to <2 x double>
store <2 x double> %ext, <2 x double> addrspace(1)* %out
@@ -125,6 +137,16 @@ define void @extload_v2f16_to_v2f64_arg(
}
; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
%ext = fpext <3 x half> %arg to <3 x double>
store <3 x double> %ext, <3 x double> addrspace(1)* %out
@@ -132,6 +154,19 @@ define void @extload_v3f16_to_v3f64_arg(
}
; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
%ext = fpext <4 x half> %arg to <4 x double>
store <4 x double> %ext, <4 x double> addrspace(1)* %out
@@ -139,6 +174,37 @@ define void @extload_v4f16_to_v4f64_arg(
}
; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN: s_endpgm
define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
%ext = fpext <8 x half> %arg to <8 x double>
store <8 x double> %ext, <8 x double> addrspace(1)* %out
@@ -194,6 +260,12 @@ define void @global_extload_f16_to_f32(f
}
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
+; GCN: s_endpgm
define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%cvt = fpext <2 x half> %val to <2 x float>
@@ -246,6 +318,15 @@ define void @global_extload_f16_to_f64(d
}
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]]
+; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]]
+; GCN-DAG: buffer_store_dwordx2 [[CVT2]]
+; GCN-DAG: buffer_store_dwordx2 [[CVT3]]
+; GCN: s_endpgm
define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%cvt = fpext <2 x half> %val to <2 x double>
More information about the llvm-commits
mailing list