[PATCH] R600: Fix extloads from i8 / i16 to i64.

Matt Arsenault Matthew.Arsenault at amd.com
Wed Mar 5 20:19:43 PST 2014


This appears to only be working for global loads. Private and local break for other reasons.


http://llvm-reviews.chandlerc.com/D2974

Files:
  include/llvm/CodeGen/ISDOpcodes.h
  lib/CodeGen/SelectionDAG/SelectionDAG.cpp
  lib/Target/R600/AMDGPUISelLowering.cpp
  lib/Target/R600/SIISelLowering.cpp
  test/CodeGen/R600/extload.ll

Index: include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- include/llvm/CodeGen/ISDOpcodes.h
+++ include/llvm/CodeGen/ISDOpcodes.h
@@ -702,6 +702,8 @@
     LAST_LOADEXT_TYPE
   };
 
+  NodeType getExtForLoadExtType(LoadExtType);
+
   //===--------------------------------------------------------------------===//
   /// ISD::CondCode enum - These are ordered carefully to make the bitfields
   /// below work out, when considering SETFALSE (something that never exists
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -231,6 +231,21 @@
   return true;
 }
 
+ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+  switch (ExtType) {
+  case ISD::EXTLOAD:
+    return ISD::ANY_EXTEND;
+  case ISD::SEXTLOAD:
+    return ISD::SIGN_EXTEND;
+  case ISD::ZEXTLOAD:
+    return ISD::ZERO_EXTEND;
+  default:
+    break;
+  }
+
+  llvm_unreachable("Invalid LoadExtType");
+}
+
 /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
 /// when given the operation for (X op Y).
 ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
Index: lib/Target/R600/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/R600/AMDGPUISelLowering.cpp
+++ lib/Target/R600/AMDGPUISelLowering.cpp
@@ -692,6 +692,20 @@
   SDLoc DL(Op);
   LoadSDNode *Load = cast<LoadSDNode>(Op);
   ISD::LoadExtType ExtType = Load->getExtensionType();
+  EVT VT = Op.getValueType();
+  EVT MemVT = Load->getMemoryVT();
+
+  if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
+    // We can do the extload to 32-bits, and then need to separately extend to
+    // 64-bits.
+
+    SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
+                                       Load->getChain(),
+                                       Load->getBasePtr(),
+                                       MemVT,
+                                       Load->getMemOperand());
+    return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
+  }
 
   // Lower loads constant address space global variable loads
   if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
@@ -711,8 +725,6 @@
     return SDValue();
 
 
-  EVT VT = Op.getValueType();
-  EVT MemVT = Load->getMemoryVT();
   unsigned Mask = 0;
   if (Load->getMemoryVT() == MVT::i8) {
     Mask = 0xff;
Index: lib/Target/R600/SIISelLowering.cpp
===================================================================
--- lib/Target/R600/SIISelLowering.cpp
+++ lib/Target/R600/SIISelLowering.cpp
@@ -128,6 +128,9 @@
   setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
   setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
   setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
 
Index: test/CodeGen/R600/extload.ll
===================================================================
--- test/CodeGen/R600/extload.ll
+++ test/CodeGen/R600/extload.ll
@@ -1,6 +1,7 @@
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; EG-LABEL: @anyext_load_i8:
+; FUNC-LABEL: @anyext_load_i8:
 ; EG: AND_INT
 ; EG: 255
 define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
@@ -12,7 +13,7 @@
   ret void
 }
 
-; EG-LABEL: @anyext_load_i16:
+; FUNC-LABEL: @anyext_load_i16:
 ; EG: AND_INT
 ; EG: AND_INT
 ; EG-DAG: 65535
@@ -26,7 +27,7 @@
   ret void
 }
 
-; EG-LABEL: @anyext_load_lds_i8:
+; FUNC-LABEL: @anyext_load_lds_i8:
 ; EG: AND_INT
 ; EG: 255
 define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
@@ -38,7 +39,7 @@
   ret void
 }
 
-; EG-LABEL: @anyext_load_lds_i16:
+; FUNC-LABEL: @anyext_load_lds_i16:
 ; EG: AND_INT
 ; EG: AND_INT
 ; EG-DAG: 65535
@@ -51,3 +52,69 @@
   store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
   ret void
 }
+
+; FUNC-LABEL: @sextload_global_i8_to_i64
+; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: BUFFER_STORE_DWORDX2
+define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %a = load i8 addrspace(1)* %in, align 8
+  %ext = sext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @sextload_global_i16_to_i64
+; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: BUFFER_STORE_DWORDX2
+define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %a = load i16 addrspace(1)* %in, align 8
+  %ext = sext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @sextload_global_i32_to_i64
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: BUFFER_STORE_DWORDX2
+define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %a = load i32 addrspace(1)* %in, align 8
+  %ext = sext i32 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @zextload_global_i8_to_i64
+; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: BUFFER_STORE_DWORDX2
+define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %a = load i8 addrspace(1)* %in, align 8
+  %ext = zext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @zextload_global_i16_to_i64
+; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: BUFFER_STORE_DWORDX2
+define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %a = load i16 addrspace(1)* %in, align 8
+  %ext = zext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @zextload_global_i32_to_i64
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: BUFFER_STORE_DWORDX2
+define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %a = load i32 addrspace(1)* %in, align 8
+  %ext = zext i32 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2974.1.patch
Type: text/x-patch
Size: 6919 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140305/34c52f45/attachment.bin>


More information about the llvm-commits mailing list