[PATCH] Add target hook to prevent folding some bitcasted loads.
Matt Arsenault
Matthew.Arsenault at amd.com
Mon Oct 28 19:01:42 PDT 2013
This is to avoid this transformation in some cases:
fold (conv (load x)) -> (load (conv*)x)
On architectures that don't natively support some vector
loads efficiently casting the load to a smaller vector of
larger types and loading is more efficient.
http://llvm-reviews.chandlerc.com/D2046
Files:
include/llvm/Target/TargetLowering.h
lib/CodeGen/SelectionDAG/DAGCombiner.cpp
lib/Target/R600/AMDGPUISelLowering.cpp
lib/Target/R600/AMDGPUISelLowering.h
test/CodeGen/R600/combine_vloads.ll
Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -203,6 +203,17 @@
return PredictableSelectIsExpensive;
}
+ /// isLoadBitCastBeneficial() - Return true if the following transform
+ /// is beneficial.
+ /// fold (conv (load x)) -> (load (conv*)x)
+ /// On architectures that don't natively support some vector loads efficiently,
+ /// casting the load to a smaller vector of larger types and loading
+ /// is more efficient, however, this can be undone by optimizations in
+ /// dag combiner.
+ virtual bool isLoadBitCastBeneficial(EVT /* Load */, EVT /* Bitcast */) const {
+ return true;
+ }
+
/// Return the ValueType of the result of SETCC operations. Also used to
/// obtain the target's preferred type for the condition operand of SELECT and
/// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5727,7 +5727,8 @@
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
Index: lib/Target/R600/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/R600/AMDGPUISelLowering.cpp
+++ lib/Target/R600/AMDGPUISelLowering.cpp
@@ -192,6 +192,18 @@
return MVT::i32;
}
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
+ EVT CastTy) const {
+ if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
+ return true;
+
+ unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
+ unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
+
+ return ((LScalarSize <= CastScalarSize) ||
+ (CastScalarSize >= 32) ||
+ (LScalarSize < 32));
+}
//===---------------------------------------------------------------------===//
// Target Properties
Index: lib/Target/R600/AMDGPUISelLowering.h
===================================================================
--- lib/Target/R600/AMDGPUISelLowering.h
+++ lib/Target/R600/AMDGPUISelLowering.h
@@ -75,6 +75,7 @@
virtual bool isFAbsFree(EVT VT) const;
virtual bool isFNegFree(EVT VT) const;
virtual MVT getVectorIdxTy() const;
+ virtual bool isLoadBitCastBeneficial(EVT, EVT) const LLVM_OVERRIDE;
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
Index: test/CodeGen/R600/combine_vloads.ll
===================================================================
--- /dev/null
+++ test/CodeGen/R600/combine_vloads.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=Cypress < %s | FileCheck -check-prefix=EG %s
+
+;
+; kernel void combine_vloads(global char8* src, global char8* result) {
+; for (int i = 0; i < 1024; ++i)
+; result[i] = src[0] + src[1] + src[2] + src[3];
+; }
+;
+
+
+; 128-bit loads instead of many 8-bit
+; EG-LABEL: @combine_vloads:
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
+entry:
+ br label %for.body
+
+for.exit: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
+ %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
+ %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
+ %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+ %1 = bitcast <8 x i32> %vecload2 to <32 x i8>
+ %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %tmp9 = add nsw <8 x i8> %tmp5, %tmp8
+ %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ %tmp13 = add nsw <8 x i8> %tmp9, %tmp12
+ %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %tmp17 = add nsw <8 x i8> %tmp13, %tmp16
+ %scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01
+ %2 = bitcast <8 x i8> %tmp17 to <2 x i32>
+ %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
+ store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
+ %tmp19 = add nsw i32 %i.01, 1
+ %exitcond = icmp eq i32 %tmp19, 1024
+ br i1 %exitcond, label %for.exit, label %for.body
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2046.1.patch
Type: text/x-patch
Size: 5368 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131028/7b4e824d/attachment.bin>
More information about the llvm-commits
mailing list