[llvm] r225926 - R600/SI: Fix bad code with unaligned byte vector loads
Matt Arsenault
Matthew.Arsenault at amd.com
Tue Jan 13 17:35:23 PST 2015
Author: arsenm
Date: Tue Jan 13 19:35:22 2015
New Revision: 225926
URL: http://llvm.org/viewvc/llvm-project?rev=225926&view=rev
Log:
R600/SI: Fix bad code with unaligned byte vector loads
Don't do the v4i8 -> v4f32 combine if the load will need to
be expanded due to alignment. This stops adding instructions
to repack into a single register that the v_cvt_ubyteN_f32
instructions read.
Modified:
llvm/trunk/lib/Target/R600/SIISelLowering.cpp
llvm/trunk/lib/Target/R600/SIISelLowering.h
llvm/trunk/test/CodeGen/R600/cvt_f32_ubyte.ll
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=225926&r1=225925&r2=225926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Tue Jan 13 19:35:22 2015
@@ -302,7 +302,7 @@ bool SITargetLowering::isLegalAddressing
return true;
}
-bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
unsigned Align,
bool *IsFast) const {
@@ -1167,7 +1167,7 @@ SDValue SITargetLowering::LowerTrig(SDVa
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
- DAGCombinerInfo &DCI) {
+ DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
EVT ScalarVT = VT.getScalarType();
if (ScalarVT != MVT::f32)
@@ -1215,8 +1215,21 @@ SDValue SITargetLowering::performUCharTo
EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
-
LoadSDNode *Load = cast<LoadSDNode>(Src);
+
+ unsigned AS = Load->getAddressSpace();
+ unsigned Align = Load->getAlignment();
+ Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't try to replace the load if we have to expand it due to alignment
+ // problems. Otherwise we will end up scalarizing the load, and trying to
+ // repack into the vector for no real reason.
+ if (Align < ABIAlignment &&
+ !allowsMisalignedMemoryAccesses(LoadVT, AS, Align, nullptr)) {
+ return SDValue();
+ }
+
SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
Load->getChain(),
Load->getBasePtr(),
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.h?rev=225926&r1=225925&r2=225926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.h Tue Jan 13 19:35:22 2015
@@ -50,8 +50,8 @@ class SITargetLowering : public AMDGPUTa
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
- static SDValue performUCharToFloatCombine(SDNode *N,
- DAGCombinerInfo &DCI);
+ SDValue performUCharToFloatCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const;
SDValue performSHLPtrCombine(SDNode *N,
unsigned AS,
DAGCombinerInfo &DCI) const;
Modified: llvm/trunk/test/CodeGen/R600/cvt_f32_ubyte.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/cvt_f32_ubyte.ll?rev=225926&r1=225925&r2=225926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/cvt_f32_ubyte.ll (original)
+++ llvm/trunk/test/CodeGen/R600/cvt_f32_ubyte.ll Tue Jan 13 19:35:22 2015
@@ -36,7 +36,7 @@ define void @load_v2i8_to_v2f32(<2 x flo
; SI-DAG: v_cvt_f32_ubyte0_e32
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <3 x i8> addrspace(1)* %in, align 1
+ %load = load <3 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
ret void
@@ -66,23 +66,13 @@ define void @load_v4i8_to_v4f32(<4 x flo
; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
+; SI-NOT: v_lshlrev_b32
+; SI-NOT: v_or_b32
-; SI: v_lshlrev_b32
-; SI: v_or_b32
-; SI: v_lshlrev_b32
-; SI: v_or_b32
-; SI: v_lshlrev_b32
-; SI: v_or_b32
-
-; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG0]]
-; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
-; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
-; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG3]]
-
-; SI-DAG: v_cvt_f32_ubyte0_e32
-; SI-DAG: v_cvt_f32_ubyte1_e32
-; SI-DAG: v_cvt_f32_ubyte2_e32
-; SI-DAG: v_cvt_f32_ubyte3_e32
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
More information about the llvm-commits
mailing list