[llvm-branch-commits] [llvm-branch] r293118 - Merging r292651:

Hans Wennborg via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 25 16:26:37 PST 2017


Author: hans
Date: Wed Jan 25 18:26:36 2017
New Revision: 293118

URL: http://llvm.org/viewvc/llvm-project?rev=293118&view=rev
Log:
Merging r292651:
------------------------------------------------------------------------
r292651 | jvesely | 2017-01-20 13:24:26 -0800 (Fri, 20 Jan 2017) | 8 lines

AMDGPU/R600: Serialize vector trunc stores to private AS

Add DUMMY_CHAIN SDNode to denote stores of interest

Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=28915
Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=30411

Differential Revision: https://reviews.llvm.org/D27964
------------------------------------------------------------------------

Modified:
    llvm/branches/release_40/   (props changed)
    llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUInstrInfo.td
    llvm/branches/release_40/lib/Target/AMDGPU/R600ISelLowering.cpp
    llvm/branches/release_40/lib/Target/AMDGPU/R600Instructions.td
    llvm/branches/release_40/test/CodeGen/AMDGPU/load-local-i8.ll

Propchange: llvm/branches/release_40/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Jan 25 18:26:36 2017
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,291858-291859,291863,291875,291909,291966,291968,291979,292133,292242,292254-292255,292280,292323,292444,292467,292583,292625,292641,292667,292711,292758,293025
+/llvm/trunk:155241,291858-291859,291863,291875,291909,291966,291968,291979,292133,292242,292254-292255,292280,292323,292444,292467,292583,292625,292641,292651,292667,292711,292758,293025

Modified: llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=293118&r1=293117&r2=293118&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Jan 25 18:26:36 2017
@@ -3272,6 +3272,7 @@ const char* AMDGPUTargetLowering::getTar
   NODE_NAME_CASE(CONST_DATA_PTR)
   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
   NODE_NAME_CASE(KILL)
+  NODE_NAME_CASE(DUMMY_CHAIN)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
   NODE_NAME_CASE(SENDMSG)
   NODE_NAME_CASE(SENDMSGHALT)

Modified: llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=293118&r1=293117&r2=293118&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUISelLowering.h Wed Jan 25 18:26:36 2017
@@ -320,6 +320,7 @@ enum NodeType : unsigned {
   INTERP_P2,
   PC_ADD_REL_OFFSET,
   KILL,
+  DUMMY_CHAIN,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,

Modified: llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=293118&r1=293117&r2=293118&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUInstrInfo.td Wed Jan 25 18:26:36 2017
@@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
 // This argument to this node is a dword address.
 def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
 def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
 def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
 

Modified: llvm/branches/release_40/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=293118&r1=293117&r2=293118&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/R600ISelLowering.cpp Wed Jan 25 18:26:36 2017
@@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivate
     llvm_unreachable("Unsupported private trunc store");
   }
 
-  SDValue Chain = Store->getChain();
+  SDValue OldChain = Store->getChain();
+  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
+  // Skip dummy
+  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
   SDValue BasePtr = Store->getBasePtr();
   SDValue Offset = Store->getOffset();
   EVT MemVT = Store->getMemoryVT();
@@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivate
 
   // Store dword
   // TODO: Can we be smarter about MachinePointerInfo?
-  return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+
+  // If we are part of expanded vector, make our neighbors depend on this store
+  if (VectorTrunc) {
+    // Make all other vector elements depend on this store
+    Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
+    DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
+  }
+  return NewStore;
 }
 
 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(S
   // Neither LOCAL nor PRIVATE can do vectors at the moment
   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
+    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+      // Add an extra level of chain to isolate this vector
+      SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
+      // TODO: can the chain be replaced without creating a new store?
+      SDValue NewStore = DAG.getTruncStore(
+          NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
+          MemVT, StoreNode->getAlignment(),
+          StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+      StoreNode = cast<StoreSDNode>(NewStore);
+    }
+
     return scalarizeVectorStore(StoreNode, DAG);
   }
 
@@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(S
       // Put the mask in correct place
       SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
 
-      // Put the mask in correct place
+      // Put the value bits in correct place
       SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
       SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
 

Modified: llvm/branches/release_40/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/R600Instructions.td?rev=293118&r1=293117&r2=293118&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/R600Instructions.td Wed Jan 25 18:26:36 2017
@@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOO
 
 def MOV : R600_1OP <0x19, "MOV", []>;
 
+
+// This is a hack to get rid of DUMMY_CHAIN nodes.
+// Most DUMMY_CHAINs should be eliminated during legalization, but undef
+// values can sneak in some to selection.
+let isPseudo = 1, isCodeGenOnly = 1 in {
+def DUMMY_CHAIN : AMDGPUInst <
+  (outs),
+  (ins),
+  "DUMMY_CHAIN",
+  [(R600dummy_chain)]
+>;
+} // end let isPseudo = 1, isCodeGenOnly = 1
+
+
 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
 
 class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <

Modified: llvm/branches/release_40/test/CodeGen/AMDGPU/load-local-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/test/CodeGen/AMDGPU/load-local-i8.ll?rev=293118&r1=293117&r2=293118&view=diff
==============================================================================
--- llvm/branches/release_40/test/CodeGen/AMDGPU/load-local-i8.ll (original)
+++ llvm/branches/release_40/test/CodeGen/AMDGPU/load-local-i8.ll Wed Jan 25 18:26:36 2017
@@ -708,10 +708,11 @@ define void @local_zextload_v4i8_to_v4i1
 ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
 
 ; EG: LDS_READ_RET
+; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
+; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-; EG-DAG: ASHR
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
@@ -740,14 +741,15 @@ define void @local_zextload_v8i8_to_v8i1
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
+; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-; EG-DAG: ASHR
-; EG-DAG: ASHR
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
@@ -786,6 +788,11 @@ define void @local_zextload_v16i8_to_v16
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
+; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
@@ -798,10 +805,6 @@ define void @local_zextload_v16i8_to_v16
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
@@ -860,6 +863,11 @@ define void @local_zextload_v32i8_to_v32
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
+; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
@@ -884,14 +892,6 @@ define void @local_zextload_v32i8_to_v32
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
-; EG-DAG: ASHR
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE




More information about the llvm-branch-commits mailing list