[PATCH] R600: Fix extloads from i8 / i16 to i64.
Matt Arsenault
Matthew.Arsenault at amd.com
Wed Mar 5 20:19:43 PST 2014
This appears to only be working for global loads. Private and local break for other reasons.
http://llvm-reviews.chandlerc.com/D2974
Files:
include/llvm/CodeGen/ISDOpcodes.h
lib/CodeGen/SelectionDAG/SelectionDAG.cpp
lib/Target/R600/AMDGPUISelLowering.cpp
lib/Target/R600/SIISelLowering.cpp
test/CodeGen/R600/extload.ll
Index: include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- include/llvm/CodeGen/ISDOpcodes.h
+++ include/llvm/CodeGen/ISDOpcodes.h
@@ -702,6 +702,8 @@
LAST_LOADEXT_TYPE
};
+ NodeType getExtForLoadExtType(LoadExtType);
+
//===--------------------------------------------------------------------===//
/// ISD::CondCode enum - These are ordered carefully to make the bitfields
/// below work out, when considering SETFALSE (something that never exists
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -231,6 +231,21 @@
return true;
}
+ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ return ISD::ANY_EXTEND;
+ case ISD::SEXTLOAD:
+ return ISD::SIGN_EXTEND;
+ case ISD::ZEXTLOAD:
+ return ISD::ZERO_EXTEND;
+ default:
+ break;
+ }
+
+ llvm_unreachable("Invalid LoadExtType");
+}
+
/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
/// when given the operation for (X op Y).
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
Index: lib/Target/R600/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/R600/AMDGPUISelLowering.cpp
+++ lib/Target/R600/AMDGPUISelLowering.cpp
@@ -692,6 +692,20 @@
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
ISD::LoadExtType ExtType = Load->getExtensionType();
+ EVT VT = Op.getValueType();
+ EVT MemVT = Load->getMemoryVT();
+
+ if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
+ // We can do the extload to 32-bits, and then need to separately extend to
+ // 64-bits.
+
+ SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
+ Load->getChain(),
+ Load->getBasePtr(),
+ MemVT,
+ Load->getMemOperand());
+ return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
+ }
// Lower loads constant address space global variable loads
if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
@@ -711,8 +725,6 @@
return SDValue();
- EVT VT = Op.getValueType();
- EVT MemVT = Load->getMemoryVT();
unsigned Mask = 0;
if (Load->getMemoryVT() == MVT::i8) {
Mask = 0xff;
Index: lib/Target/R600/SIISelLowering.cpp
===================================================================
--- lib/Target/R600/SIISelLowering.cpp
+++ lib/Target/R600/SIISelLowering.cpp
@@ -128,6 +128,9 @@
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
Index: test/CodeGen/R600/extload.ll
===================================================================
--- test/CodeGen/R600/extload.ll
+++ test/CodeGen/R600/extload.ll
@@ -1,6 +1,7 @@
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; EG-LABEL: @anyext_load_i8:
+; FUNC-LABEL: @anyext_load_i8:
; EG: AND_INT
; EG: 255
define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
@@ -12,7 +13,7 @@
ret void
}
-; EG-LABEL: @anyext_load_i16:
+; FUNC-LABEL: @anyext_load_i16:
; EG: AND_INT
; EG: AND_INT
; EG-DAG: 65535
@@ -26,7 +27,7 @@
ret void
}
-; EG-LABEL: @anyext_load_lds_i8:
+; FUNC-LABEL: @anyext_load_lds_i8:
; EG: AND_INT
; EG: 255
define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
@@ -38,7 +39,7 @@
ret void
}
-; EG-LABEL: @anyext_load_lds_i16:
+; FUNC-LABEL: @anyext_load_lds_i16:
; EG: AND_INT
; EG: AND_INT
; EG-DAG: 65535
@@ -51,3 +52,69 @@
store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
ret void
}
+
+; FUNC-LABEL: @sextload_global_i8_to_i64
+; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: BUFFER_STORE_DWORDX2
+define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in, align 8
+ %ext = sext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @sextload_global_i16_to_i64
+; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: BUFFER_STORE_DWORDX2
+define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in, align 8
+ %ext = sext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @sextload_global_i32_to_i64
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: BUFFER_STORE_DWORDX2
+define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %a = load i32 addrspace(1)* %in, align 8
+ %ext = sext i32 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @zextload_global_i8_to_i64
+; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: BUFFER_STORE_DWORDX2
+define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in, align 8
+ %ext = zext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @zextload_global_i16_to_i64
+; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: BUFFER_STORE_DWORDX2
+define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in, align 8
+ %ext = zext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @zextload_global_i32_to_i64
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: BUFFER_STORE_DWORDX2
+define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %a = load i32 addrspace(1)* %in, align 8
+ %ext = zext i32 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2974.1.patch
Type: text/x-patch
Size: 6919 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140305/34c52f45/attachment.bin>
More information about the llvm-commits
mailing list