[PATCH] R600: Fix extloads from i8 / i16 to i64.
Tom Stellard
tom at stellard.net
Thu Mar 6 04:02:52 PST 2014
On Wed, Mar 05, 2014 at 08:19:43PM -0800, Matt Arsenault wrote:
> This appears to only be working for global loads. Private and local break for other reasons.
>
LGTM.
-Tom
>
> http://llvm-reviews.chandlerc.com/D2974
>
> Files:
> include/llvm/CodeGen/ISDOpcodes.h
> lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> lib/Target/R600/AMDGPUISelLowering.cpp
> lib/Target/R600/SIISelLowering.cpp
> test/CodeGen/R600/extload.ll
>
> Index: include/llvm/CodeGen/ISDOpcodes.h
> ===================================================================
> --- include/llvm/CodeGen/ISDOpcodes.h
> +++ include/llvm/CodeGen/ISDOpcodes.h
> @@ -702,6 +702,8 @@
> LAST_LOADEXT_TYPE
> };
>
> + NodeType getExtForLoadExtType(LoadExtType);
> +
> //===--------------------------------------------------------------------===//
> /// ISD::CondCode enum - These are ordered carefully to make the bitfields
> /// below work out, when considering SETFALSE (something that never exists
> Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -231,6 +231,21 @@
> return true;
> }
>
> +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
> + switch (ExtType) {
> + case ISD::EXTLOAD:
> + return ISD::ANY_EXTEND;
> + case ISD::SEXTLOAD:
> + return ISD::SIGN_EXTEND;
> + case ISD::ZEXTLOAD:
> + return ISD::ZERO_EXTEND;
> + default:
> + break;
> + }
> +
> + llvm_unreachable("Invalid LoadExtType");
> +}
> +
> /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
> /// when given the operation for (X op Y).
> ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -692,6 +692,20 @@
> SDLoc DL(Op);
> LoadSDNode *Load = cast<LoadSDNode>(Op);
> ISD::LoadExtType ExtType = Load->getExtensionType();
> + EVT VT = Op.getValueType();
> + EVT MemVT = Load->getMemoryVT();
> +
> + if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
> + // We can do the extload to 32-bits, and then need to separately extend to
> + // 64-bits.
> +
> + SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
> + Load->getChain(),
> + Load->getBasePtr(),
> + MemVT,
> + Load->getMemOperand());
> + return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
> + }
>
> // Lower loads constant address space global variable loads
> if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
> @@ -711,8 +725,6 @@
> return SDValue();
>
>
> - EVT VT = Op.getValueType();
> - EVT MemVT = Load->getMemoryVT();
> unsigned Mask = 0;
> if (Load->getMemoryVT() == MVT::i8) {
> Mask = 0xff;
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -128,6 +128,9 @@
> setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
> setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
> setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
> setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
> setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
>
> Index: test/CodeGen/R600/extload.ll
> ===================================================================
> --- test/CodeGen/R600/extload.ll
> +++ test/CodeGen/R600/extload.ll
> @@ -1,6 +1,7 @@
> -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
> +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
>
> -; EG-LABEL: @anyext_load_i8:
> +; FUNC-LABEL: @anyext_load_i8:
> ; EG: AND_INT
> ; EG: 255
> define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
> @@ -12,7 +13,7 @@
> ret void
> }
>
> -; EG-LABEL: @anyext_load_i16:
> +; FUNC-LABEL: @anyext_load_i16:
> ; EG: AND_INT
> ; EG: AND_INT
> ; EG-DAG: 65535
> @@ -26,7 +27,7 @@
> ret void
> }
>
> -; EG-LABEL: @anyext_load_lds_i8:
> +; FUNC-LABEL: @anyext_load_lds_i8:
> ; EG: AND_INT
> ; EG: 255
> define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
> @@ -38,7 +39,7 @@
> ret void
> }
>
> -; EG-LABEL: @anyext_load_lds_i16:
> +; FUNC-LABEL: @anyext_load_lds_i16:
> ; EG: AND_INT
> ; EG: AND_INT
> ; EG-DAG: 65535
> @@ -51,3 +52,69 @@
> store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
> ret void
> }
> +
> +; FUNC-LABEL: @sextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> + %a = load i8 addrspace(1)* %in, align 8
> + %ext = sext i8 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> + %a = load i16 addrspace(1)* %in, align 8
> + %ext = sext i16 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> + %a = load i32 addrspace(1)* %in, align 8
> + %ext = sext i32 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> + %a = load i8 addrspace(1)* %in, align 8
> + %ext = zext i8 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> + %a = load i16 addrspace(1)* %in, align 8
> + %ext = zext i16 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> + %a = load i32 addrspace(1)* %in, align 8
> + %ext = zext i32 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> Index: include/llvm/CodeGen/ISDOpcodes.h
> ===================================================================
> --- include/llvm/CodeGen/ISDOpcodes.h
> +++ include/llvm/CodeGen/ISDOpcodes.h
> @@ -702,6 +702,8 @@
> LAST_LOADEXT_TYPE
> };
>
> + NodeType getExtForLoadExtType(LoadExtType);
> +
> //===--------------------------------------------------------------------===//
> /// ISD::CondCode enum - These are ordered carefully to make the bitfields
> /// below work out, when considering SETFALSE (something that never exists
> Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -231,6 +231,21 @@
> return true;
> }
>
> +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
> + switch (ExtType) {
> + case ISD::EXTLOAD:
> + return ISD::ANY_EXTEND;
> + case ISD::SEXTLOAD:
> + return ISD::SIGN_EXTEND;
> + case ISD::ZEXTLOAD:
> + return ISD::ZERO_EXTEND;
> + default:
> + break;
> + }
> +
> + llvm_unreachable("Invalid LoadExtType");
> +}
> +
> /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
> /// when given the operation for (X op Y).
> ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -692,6 +692,20 @@
> SDLoc DL(Op);
> LoadSDNode *Load = cast<LoadSDNode>(Op);
> ISD::LoadExtType ExtType = Load->getExtensionType();
> + EVT VT = Op.getValueType();
> + EVT MemVT = Load->getMemoryVT();
> +
> + if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
> + // We can do the extload to 32-bits, and then need to separately extend to
> + // 64-bits.
> +
> + SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
> + Load->getChain(),
> + Load->getBasePtr(),
> + MemVT,
> + Load->getMemOperand());
> + return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
> + }
>
> // Lower loads constant address space global variable loads
> if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
> @@ -711,8 +725,6 @@
> return SDValue();
>
>
> - EVT VT = Op.getValueType();
> - EVT MemVT = Load->getMemoryVT();
> unsigned Mask = 0;
> if (Load->getMemoryVT() == MVT::i8) {
> Mask = 0xff;
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -128,6 +128,9 @@
> setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
> setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
> setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
> + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
> setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
> setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
>
> Index: test/CodeGen/R600/extload.ll
> ===================================================================
> --- test/CodeGen/R600/extload.ll
> +++ test/CodeGen/R600/extload.ll
> @@ -1,6 +1,7 @@
> -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
> +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
>
> -; EG-LABEL: @anyext_load_i8:
> +; FUNC-LABEL: @anyext_load_i8:
> ; EG: AND_INT
> ; EG: 255
> define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
> @@ -12,7 +13,7 @@
> ret void
> }
>
> -; EG-LABEL: @anyext_load_i16:
> +; FUNC-LABEL: @anyext_load_i16:
> ; EG: AND_INT
> ; EG: AND_INT
> ; EG-DAG: 65535
> @@ -26,7 +27,7 @@
> ret void
> }
>
> -; EG-LABEL: @anyext_load_lds_i8:
> +; FUNC-LABEL: @anyext_load_lds_i8:
> ; EG: AND_INT
> ; EG: 255
> define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
> @@ -38,7 +39,7 @@
> ret void
> }
>
> -; EG-LABEL: @anyext_load_lds_i16:
> +; FUNC-LABEL: @anyext_load_lds_i16:
> ; EG: AND_INT
> ; EG: AND_INT
> ; EG-DAG: 65535
> @@ -51,3 +52,69 @@
> store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
> ret void
> }
> +
> +; FUNC-LABEL: @sextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> + %a = load i8 addrspace(1)* %in, align 8
> + %ext = sext i8 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> + %a = load i16 addrspace(1)* %in, align 8
> + %ext = sext i16 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> + %a = load i32 addrspace(1)* %in, align 8
> + %ext = sext i32 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> + %a = load i8 addrspace(1)* %in, align 8
> + %ext = zext i8 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> + %a = load i16 addrspace(1)* %in, align 8
> + %ext = zext i16 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> + %a = load i32 addrspace(1)* %in, align 8
> + %ext = zext i32 %a to i64
> + store i64 %ext, i64 addrspace(1)* %out, align 8
> + ret void
> +}
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list