[PATCH] R600/SI: Move i64 -> v2i32 load promotion into AMDGPUDAGToDAGISel::Select()
Tom Stellard
thomas.stellard at amd.com
Fri Jan 23 09:01:28 PST 2015
We used to do this promotion during DAG legalization, but this
caused an infinite loop in ExpandUnalignedLoad() because it assumed
that i64 loads were legal if i64 was a legal type.
It also seems better to report i64 loads as legal, since they actually
are and we were just promoting them to simplify our tablegen files.
---
lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 23 +++++++++++++++++++++++
lib/Target/R600/AMDGPUISelLowering.cpp | 3 ---
test/CodeGen/R600/misaligned-load.ll | 18 ++++++++++++++++++
3 files changed, 41 insertions(+), 3 deletions(-)
create mode 100644 test/CodeGen/R600/misaligned-load.ll
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index e0e8168..d7179c6 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -417,6 +417,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
N->getValueType(0), Ops);
}
+ case ISD::LOAD: {
+ // To simplify the TableGen patters, we replace all i64 loads with
+ // v2i32 loads. Alternatively, we could promote i64 loads to v2i32
+ // during DAG legalization, however, so places (ExpandUnalignedLoad)
+ // in the DAG legalizer assume that if i64 is legal, so doing this
+ // promotion early can cause problems.
+ EVT VT = N->getValueType(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
+
+ SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
+ LD->getBasePtr(), LD->getMemOperand());
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ MVT::i64, NewLoad);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ SDValue(NewLoad.getNode(), 1));
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
+ SelectCode(NewLoad.getNode());
+ N = BitCast.getNode();
+ break;
+ }
+
case AMDGPUISD::REGISTER_LOAD: {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 7ce0c36..4868a0a 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -189,9 +189,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
- setOperationAction(ISD::LOAD, MVT::i64, Promote);
- AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
-
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll
new file mode 100644
index 0000000..6290ca0
--- /dev/null
+++ b/test/CodeGen/R600/misaligned-load.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI: @byte_aligned_load64
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: s_endpgm
+define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
+entry:
+ %0 = load i64 addrspace(3)* %in, align 1
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
--
1.8.5.5
More information about the llvm-commits
mailing list