[PATCH] R600: Fix extloads from i8 / i16 to i64.

Tom Stellard tom at stellard.net
Thu Mar 6 04:02:52 PST 2014


On Wed, Mar 05, 2014 at 08:19:43PM -0800, Matt Arsenault wrote:
> This appears to only be working for global loads. Private and local break for other reasons.
> 

LGTM.

-Tom

> 
> http://llvm-reviews.chandlerc.com/D2974
> 
> Files:
>   include/llvm/CodeGen/ISDOpcodes.h
>   lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>   lib/Target/R600/AMDGPUISelLowering.cpp
>   lib/Target/R600/SIISelLowering.cpp
>   test/CodeGen/R600/extload.ll
> 
> Index: include/llvm/CodeGen/ISDOpcodes.h
> ===================================================================
> --- include/llvm/CodeGen/ISDOpcodes.h
> +++ include/llvm/CodeGen/ISDOpcodes.h
> @@ -702,6 +702,8 @@
>      LAST_LOADEXT_TYPE
>    };
>  
> +  NodeType getExtForLoadExtType(LoadExtType);
> +
>    //===--------------------------------------------------------------------===//
>    /// ISD::CondCode enum - These are ordered carefully to make the bitfields
>    /// below work out, when considering SETFALSE (something that never exists
> Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -231,6 +231,21 @@
>    return true;
>  }
>  
> +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
> +  switch (ExtType) {
> +  case ISD::EXTLOAD:
> +    return ISD::ANY_EXTEND;
> +  case ISD::SEXTLOAD:
> +    return ISD::SIGN_EXTEND;
> +  case ISD::ZEXTLOAD:
> +    return ISD::ZERO_EXTEND;
> +  default:
> +    break;
> +  }
> +
> +  llvm_unreachable("Invalid LoadExtType");
> +}
> +
>  /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
>  /// when given the operation for (X op Y).
>  ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -692,6 +692,20 @@
>    SDLoc DL(Op);
>    LoadSDNode *Load = cast<LoadSDNode>(Op);
>    ISD::LoadExtType ExtType = Load->getExtensionType();
> +  EVT VT = Op.getValueType();
> +  EVT MemVT = Load->getMemoryVT();
> +
> +  if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
> +    // We can do the extload to 32-bits, and then need to separately extend to
> +    // 64-bits.
> +
> +    SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
> +                                       Load->getChain(),
> +                                       Load->getBasePtr(),
> +                                       MemVT,
> +                                       Load->getMemOperand());
> +    return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
> +  }
>  
>    // Lower loads constant address space global variable loads
>    if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
> @@ -711,8 +725,6 @@
>      return SDValue();
>  
>  
> -  EVT VT = Op.getValueType();
> -  EVT MemVT = Load->getMemoryVT();
>    unsigned Mask = 0;
>    if (Load->getMemoryVT() == MVT::i8) {
>      Mask = 0xff;
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -128,6 +128,9 @@
>    setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
>  
> Index: test/CodeGen/R600/extload.ll
> ===================================================================
> --- test/CodeGen/R600/extload.ll
> +++ test/CodeGen/R600/extload.ll
> @@ -1,6 +1,7 @@
> -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
> +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
>  
> -; EG-LABEL: @anyext_load_i8:
> +; FUNC-LABEL: @anyext_load_i8:
>  ; EG: AND_INT
>  ; EG: 255
>  define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
> @@ -12,7 +13,7 @@
>    ret void
>  }
>  
> -; EG-LABEL: @anyext_load_i16:
> +; FUNC-LABEL: @anyext_load_i16:
>  ; EG: AND_INT
>  ; EG: AND_INT
>  ; EG-DAG: 65535
> @@ -26,7 +27,7 @@
>    ret void
>  }
>  
> -; EG-LABEL: @anyext_load_lds_i8:
> +; FUNC-LABEL: @anyext_load_lds_i8:
>  ; EG: AND_INT
>  ; EG: 255
>  define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
> @@ -38,7 +39,7 @@
>    ret void
>  }
>  
> -; EG-LABEL: @anyext_load_lds_i16:
> +; FUNC-LABEL: @anyext_load_lds_i16:
>  ; EG: AND_INT
>  ; EG: AND_INT
>  ; EG-DAG: 65535
> @@ -51,3 +52,69 @@
>    store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
>    ret void
>  }
> +
> +; FUNC-LABEL: @sextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> +  %a = load i8 addrspace(1)* %in, align 8
> +  %ext = sext i8 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> +  %a = load i16 addrspace(1)* %in, align 8
> +  %ext = sext i16 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +  %a = load i32 addrspace(1)* %in, align 8
> +  %ext = sext i32 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> +  %a = load i8 addrspace(1)* %in, align 8
> +  %ext = zext i8 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> +  %a = load i16 addrspace(1)* %in, align 8
> +  %ext = zext i16 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +  %a = load i32 addrspace(1)* %in, align 8
> +  %ext = zext i32 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}

> Index: include/llvm/CodeGen/ISDOpcodes.h
> ===================================================================
> --- include/llvm/CodeGen/ISDOpcodes.h
> +++ include/llvm/CodeGen/ISDOpcodes.h
> @@ -702,6 +702,8 @@
>      LAST_LOADEXT_TYPE
>    };
>  
> +  NodeType getExtForLoadExtType(LoadExtType);
> +
>    //===--------------------------------------------------------------------===//
>    /// ISD::CondCode enum - These are ordered carefully to make the bitfields
>    /// below work out, when considering SETFALSE (something that never exists
> Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -231,6 +231,21 @@
>    return true;
>  }
>  
> +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
> +  switch (ExtType) {
> +  case ISD::EXTLOAD:
> +    return ISD::ANY_EXTEND;
> +  case ISD::SEXTLOAD:
> +    return ISD::SIGN_EXTEND;
> +  case ISD::ZEXTLOAD:
> +    return ISD::ZERO_EXTEND;
> +  default:
> +    break;
> +  }
> +
> +  llvm_unreachable("Invalid LoadExtType");
> +}
> +
>  /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
>  /// when given the operation for (X op Y).
>  ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -692,6 +692,20 @@
>    SDLoc DL(Op);
>    LoadSDNode *Load = cast<LoadSDNode>(Op);
>    ISD::LoadExtType ExtType = Load->getExtensionType();
> +  EVT VT = Op.getValueType();
> +  EVT MemVT = Load->getMemoryVT();
> +
> +  if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
> +    // We can do the extload to 32-bits, and then need to separately extend to
> +    // 64-bits.
> +
> +    SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
> +                                       Load->getChain(),
> +                                       Load->getBasePtr(),
> +                                       MemVT,
> +                                       Load->getMemOperand());
> +    return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
> +  }
>  
>    // Lower loads constant address space global variable loads
>    if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
> @@ -711,8 +725,6 @@
>      return SDValue();
>  
>  
> -  EVT VT = Op.getValueType();
> -  EVT MemVT = Load->getMemoryVT();
>    unsigned Mask = 0;
>    if (Load->getMemoryVT() == MVT::i8) {
>      Mask = 0xff;
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -128,6 +128,9 @@
>    setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
>    setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
>  
> Index: test/CodeGen/R600/extload.ll
> ===================================================================
> --- test/CodeGen/R600/extload.ll
> +++ test/CodeGen/R600/extload.ll
> @@ -1,6 +1,7 @@
> -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
> +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
>  
> -; EG-LABEL: @anyext_load_i8:
> +; FUNC-LABEL: @anyext_load_i8:
>  ; EG: AND_INT
>  ; EG: 255
>  define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
> @@ -12,7 +13,7 @@
>    ret void
>  }
>  
> -; EG-LABEL: @anyext_load_i16:
> +; FUNC-LABEL: @anyext_load_i16:
>  ; EG: AND_INT
>  ; EG: AND_INT
>  ; EG-DAG: 65535
> @@ -26,7 +27,7 @@
>    ret void
>  }
>  
> -; EG-LABEL: @anyext_load_lds_i8:
> +; FUNC-LABEL: @anyext_load_lds_i8:
>  ; EG: AND_INT
>  ; EG: 255
>  define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
> @@ -38,7 +39,7 @@
>    ret void
>  }
>  
> -; EG-LABEL: @anyext_load_lds_i16:
> +; FUNC-LABEL: @anyext_load_lds_i16:
>  ; EG: AND_INT
>  ; EG: AND_INT
>  ; EG-DAG: 65535
> @@ -51,3 +52,69 @@
>    store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
>    ret void
>  }
> +
> +; FUNC-LABEL: @sextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> +  %a = load i8 addrspace(1)* %in, align 8
> +  %ext = sext i8 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> +  %a = load i16 addrspace(1)* %in, align 8
> +  %ext = sext i16 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @sextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
> +; SI: BUFFER_STORE_DWORDX2
> +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +  %a = load i32 addrspace(1)* %in, align 8
> +  %ext = sext i32 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i8_to_i64
> +; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
> +  %a = load i8 addrspace(1)* %in, align 8
> +  %ext = zext i8 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i16_to_i64
> +; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
> +  %a = load i16 addrspace(1)* %in, align 8
> +  %ext = zext i16 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @zextload_global_i32_to_i64
> +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI: BUFFER_STORE_DWORDX2
> +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +  %a = load i32 addrspace(1)* %in, align 8
> +  %ext = zext i32 %a to i64
> +  store i64 %ext, i64 addrspace(1)* %out, align 8
> +  ret void
> +}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list