[llvm] r213186 - [NVPTX] Honor alignment on vector loads/stores
Justin Holewinski
jholewinski at nvidia.com
Wed Jul 16 12:45:37 PDT 2014
Author: jholewinski
Date: Wed Jul 16 14:45:35 2014
New Revision: 213186
URL: http://llvm.org/viewvc/llvm-project?rev=213186&view=rev
Log:
[NVPTX] Honor alignment on vector loads/stores
We were not considering the stated alignment on vector loads/stores,
leading us to generate vector instructions even when we do not have
sufficient alignment.
Now, for IR like:
%1 = load <4 x float>, <4 x float>* %ptr, align 4
we will generate correct, conservative PTX like:
ld.f32 ... [%ptr]
ld.f32 ... [%ptr+4]
ld.f32 ... [%ptr+8]
ld.f32 ... [%ptr+12]
Or if we have an alignment of 8 (for example), we can
generate code like:
ld.v2.f32 ... [%ptr]
ld.v2.f32 ... [%ptr+8]
Added:
llvm/trunk/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
Modified:
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=213186&r1=213185&r2=213186&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Jul 16 14:45:35 2014
@@ -1494,6 +1494,21 @@ NVPTXTargetLowering::LowerSTOREVector(SD
break;
}
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ const DataLayout *TD = getDataLayout();
+
+ unsigned Align = MemSD->getAlignment();
+ unsigned PrefAlign =
+ TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+ if (Align < PrefAlign) {
+ // This store is not sufficiently aligned, so bail out and let this vector
+ // store be scalarized. Note that we may still be able to emit smaller
+ // vector stores. For example, if we are storing a <4 x float> with an
+ // alignment of 8, this check will fail but the legalizer will try again
+ // with 2 x <2 x float>, which will succeed with an alignment of 8.
+ return SDValue();
+ }
+
unsigned Opcode = 0;
EVT EltVT = ValVT.getVectorElementType();
unsigned NumElts = ValVT.getVectorNumElements();
@@ -1536,8 +1551,6 @@ NVPTXTargetLowering::LowerSTOREVector(SD
Ops.push_back(N->getOperand(i));
}
- MemSDNode *MemSD = cast<MemSDNode>(N);
-
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
MemSD->getMemoryVT(), MemSD->getMemOperand());
@@ -3046,6 +3059,7 @@ SDValue NVPTXTargetLowering::PerformDAGC
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ const DataLayout *TD,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
SDLoc DL(N);
@@ -3073,6 +3087,20 @@ static void ReplaceLoadVector(SDNode *N,
break;
}
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ unsigned Align = LD->getAlignment();
+ unsigned PrefAlign =
+ TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+ if (Align < PrefAlign) {
+ // This load is not sufficiently aligned, so bail out and let this vector
+ // load be scalarized. Note that we may still be able to emit smaller
+ // vector loads. For example, if we are loading a <4 x float> with an
+ // alignment of 8, this check will fail but the legalizer will try again
+ // with 2 x <2 x float>, which will succeed with an alignment of 8.
+ return;
+ }
+
EVT EltVT = ResVT.getVectorElementType();
unsigned NumElts = ResVT.getVectorNumElements();
@@ -3109,8 +3137,6 @@ static void ReplaceLoadVector(SDNode *N,
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
- LoadSDNode *LD = cast<LoadSDNode>(N);
-
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
@@ -3283,7 +3309,7 @@ void NVPTXTargetLowering::ReplaceNodeRes
default:
report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
- ReplaceLoadVector(N, DAG, Results);
+ ReplaceLoadVector(N, DAG, getDataLayout(), Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
Added: llvm/trunk/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/misaligned-vector-ldst.ll?rev=213186&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/misaligned-vector-ldst.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/misaligned-vector-ldst.ll Wed Jul 16 14:45:35 2014
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: t1
+define <4 x float> @t1(i8* %p1) {
+; CHECK-NOT: ld.v4
+; CHECK-NOT: ld.v2
+; CHECK-NOT: ld.f32
+; CHECK: ld.u8
+ %cast = bitcast i8* %p1 to <4 x float>*
+ %r = load <4 x float>* %cast, align 1
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: t2
+define <4 x float> @t2(i8* %p1) {
+; CHECK-NOT: ld.v4
+; CHECK-NOT: ld.v2
+; CHECK: ld.f32
+ %cast = bitcast i8* %p1 to <4 x float>*
+ %r = load <4 x float>* %cast, align 4
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: t3
+define <4 x float> @t3(i8* %p1) {
+; CHECK-NOT: ld.v4
+; CHECK: ld.v2
+ %cast = bitcast i8* %p1 to <4 x float>*
+ %r = load <4 x float>* %cast, align 8
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: t4
+define <4 x float> @t4(i8* %p1) {
+; CHECK: ld.v4
+ %cast = bitcast i8* %p1 to <4 x float>*
+ %r = load <4 x float>* %cast, align 16
+ ret <4 x float> %r
+}
+
+
+; CHECK-LABEL: s1
+define void @s1(<4 x float>* %p1, <4 x float> %v) {
+; CHECK-NOT: st.v4
+; CHECK-NOT: st.v2
+; CHECK-NOT: st.f32
+; CHECK: st.u8
+ store <4 x float> %v, <4 x float>* %p1, align 1
+ ret void
+}
+
+; CHECK-LABEL: s2
+define void @s2(<4 x float>* %p1, <4 x float> %v) {
+; CHECK-NOT: st.v4
+; CHECK-NOT: st.v2
+; CHECK: st.f32
+ store <4 x float> %v, <4 x float>* %p1, align 4
+ ret void
+}
+
+; CHECK-LABEL: s3
+define void @s3(<4 x float>* %p1, <4 x float> %v) {
+; CHECK-NOT: st.v4
+ store <4 x float> %v, <4 x float>* %p1, align 8
+ ret void
+}
+
+; CHECK-LABEL: s4
+define void @s4(<4 x float>* %p1, <4 x float> %v) {
+; CHECK: st.v4
+ store <4 x float> %v, <4 x float>* %p1, align 16
+ ret void
+}
+
More information about the llvm-commits
mailing list