[PATCH] R600/SI: Add support for 64-bit loads

Tom Stellard tom at stellard.net
Thu Jun 27 13:52:10 PDT 2013


From: Tom Stellard <thomas.stellard at amd.com>

---
 lib/Target/R600/AMDGPUInstructions.td | 20 +++++++++++++++++
 lib/Target/R600/SIISelLowering.cpp    |  2 ++
 lib/Target/R600/SIInstructions.td     |  8 ++++++-
 test/CodeGen/R600/load.ll             | 42 +++++++++++++++++++++++++++++++++++
 test/CodeGen/R600/sra.ll              | 14 ++++++++++++
 5 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 29df374..35b91d3 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -86,6 +86,12 @@ def COND_NULL : PatLeaf <
 // Load/Store Pattern Fragments
 //===----------------------------------------------------------------------===//
 
+def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  return L->getExtensionType() == ISD::ZEXTLOAD ||
+         L->getExtensionType() == ISD::EXTLOAD;
+}]>;
+
 def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
     return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
@@ -94,6 +100,20 @@ def zextloadi8_constant : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
     return isGlobalLoad(dyn_cast<LoadSDNode>(N));
 }]>;
 
+def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def az_extloadi32_global : PatFrag<(ops node:$ptr),
+                                   (az_extloadi32 node:$ptr), [{
+  return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def az_extloadi32_constant : PatFrag<(ops node:$ptr),
+                                     (az_extloadi32 node:$ptr), [{
+  return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
 class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 9d4cfef..2fbccad 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -75,6 +75,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
+
   setTargetDAGCombine(ISD::SELECT_CC);
 
   setTargetDAGCombine(ISD::SETCC);
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 9c96c08..bc5f758 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -994,7 +994,9 @@ def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64",
 def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64",
   [(set i64:$dst, (srl i64:$src0, i32:$src1))]
 >;
-def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", []>;
+def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64",
+  [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+>;
 
 def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
 def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
@@ -1638,6 +1640,10 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
                           global_load, constant_load>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
                           zextloadi8_global, zextloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
+                          global_load, constant_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
+                          az_extloadi32_global, az_extloadi32_constant>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
                           global_load, constant_load>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index d1ebaa3..44c089b 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -65,3 +65,45 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
   store float %1, float addrspace(1)* %out
   ret void
 }
+
+; R600-CHECK: @load_i64
+; R600-CHECK: RAT
+; R600-CHECK: RAT
+
+; SI-CHECK: @load_i64
+; SI-CHECK: BUFFER_LOAD_DWORDX2
+define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+entry:
+  %0 = load i64 addrspace(1)* %in
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @load_i64_sext
+; R600-CHECK: RAT
+; R600-CHECK: RAT
+; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
+; R600-CHECK: 31
+; SI-CHECK: @load_i64_sext
+; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:VGPR[0-9]_VGPR[0-9]]]
+; SI-CHECK: V_LSHL_B64 [[LSHL:VGPR[0-9]_VGPR[0-9]]], [[VAL]], 32
+; SI-CHECK: V_ASHR_I64 VGPR{{[0-9]}}_VGPR{{[0-9]}}, [[LSHL]], 32
+
+define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = sext i32 %0 to i64
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @load_i64_zext
+; R600-CHECK: RAT
+; R600-CHECK: RAT
+define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = zext i32 %0 to i64
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index 7c5cc75..5220a96 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -38,3 +38,17 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @ashr_i64
+;EG-CHECK: ASHR
+
+;SI-CHECK: @ashr_i64
+;SI-CHECK: V_ASHR_I64
+define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = sext i32 %in to i64
+  %1 = ashr i64 %0, 8
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
+
-- 
1.7.11.4




More information about the llvm-commits mailing list