[llvm] r292651 - AMDGPU/R600: Serialize vector trunc stores to private AS

Jan Vesely via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 21 16:27:40 PST 2017


Hi Hans,

I'd like this commit to be included in 4.0 release.
It fixes invalid code generation for sub-int vectors.

thanks,
Jan

On Fri, 2017-01-20 at 21:24 +0000, Jan Vesely via llvm-commits wrote:
> Author: jvesely
> Date: Fri Jan 20 15:24:26 2017
> New Revision: 292651
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=292651&view=rev
> Log:
> AMDGPU/R600: Serialize vector trunc stores to private AS
> 
> Add DUMMY_CHAIN SDNode to denote stores of interest
> 
> Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=28915
> Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=30411
> 
> Differential Revision: https://reviews.llvm.org/D27964
> 
> Modified:
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
>     llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>     llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
>     llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=292651&r1=292650&r2=292651&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Jan 20 15:24:26 2017
> @@ -3278,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTar
>    NODE_NAME_CASE(CONST_DATA_PTR)
>    NODE_NAME_CASE(PC_ADD_REL_OFFSET)
>    NODE_NAME_CASE(KILL)
> +  NODE_NAME_CASE(DUMMY_CHAIN)
>    case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
>    NODE_NAME_CASE(SENDMSG)
>    NODE_NAME_CASE(SENDMSGHALT)
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=292651&r1=292650&r2=292651&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Jan 20 15:24:26 2017
> @@ -330,6 +330,7 @@ enum NodeType : unsigned {
>    INTERP_P2,
>    PC_ADD_REL_OFFSET,
>    KILL,
> +  DUMMY_CHAIN,
>    FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>    STORE_MSKOR,
>    LOAD_CONSTANT,
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=292651&r1=292650&r2=292651&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Jan 20 15:24:26 2017
> @@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
>  // This argument to this node is a dword address.
>  def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
>  
> +// Force dependencies for vector trunc stores
> +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
> +
>  def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
>  def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
>  
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=292651&r1=292650&r2=292651&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Fri Jan 20 15:24:26 2017
> @@ -1120,7 +1120,10 @@ SDValue R600TargetLowering::lowerPrivate
>      llvm_unreachable("Unsupported private trunc store");
>    }
>  
> -  SDValue Chain = Store->getChain();
> +  SDValue OldChain = Store->getChain();
> +  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
> +  // Skip dummy
> +  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
>    SDValue BasePtr = Store->getBasePtr();
>    SDValue Offset = Store->getOffset();
>    EVT MemVT = Store->getMemoryVT();
> @@ -1176,7 +1179,15 @@ SDValue R600TargetLowering::lowerPrivate
>  
>    // Store dword
>    // TODO: Can we be smarter about MachinePointerInfo?
> -  return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
> +  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
> +
> +  // If we are part of expanded vector, make our neighbors depend on this store
> +  if (VectorTrunc) {
> +    // Make all other vector elements depend on this store
> +    Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
> +    DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
> +  }
> +  return NewStore;
>  }
>  
>  SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
> @@ -1196,6 +1207,17 @@ SDValue R600TargetLowering::LowerSTORE(S
>    // Neither LOCAL nor PRIVATE can do vectors at the moment
>    if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
>        VT.isVector()) {
> +    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
> +      // Add an extra level of chain to isolate this vector
> +      SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
> +      // TODO: can the chain be replaced without creating a new store?
> +      SDValue NewStore = DAG.getTruncStore(
> +          NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
> +          MemVT, StoreNode->getAlignment(),
> +          StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
> +      StoreNode = cast<StoreSDNode>(NewStore);
> +    }
> +
>      return scalarizeVectorStore(StoreNode, DAG);
>    }
>  
> @@ -1230,7 +1252,7 @@ SDValue R600TargetLowering::LowerSTORE(S
>        // Put the mask in correct place
>        SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
>  
> -      // Put the mask in correct place
> +      // Put the value bits in correct place
>        SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
>        SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
>  
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=292651&r1=292650&r2=292651&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Fri Jan 20 15:24:26 2017
> @@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOO
>  
>  def MOV : R600_1OP <0x19, "MOV", []>;
>  
> +
> +// This is a hack to get rid of DUMMY_CHAIN nodes.
> +// Most DUMMY_CHAINs should be eliminated during legalization, but undef
> +// values can sneak in some to selection.
> +let isPseudo = 1, isCodeGenOnly = 1 in {
> +def DUMMY_CHAIN : AMDGPUInst <
> +  (outs),
> +  (ins),
> +  "DUMMY_CHAIN",
> +  [(R600dummy_chain)]
> +>;
> +} // end let isPseudo = 1, isCodeGenOnly = 1
> +
> +
>  let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
>  
>  class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll?rev=292651&r1=292650&r2=292651&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll Fri Jan 20 15:24:26 2017
> @@ -708,10 +708,11 @@ define void @local_zextload_v4i8_to_v4i1
>  ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
>  
>  ; EG: LDS_READ_RET
> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
> +; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
> -; EG-DAG: ASHR
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
>  define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
> @@ -740,14 +741,15 @@ define void @local_zextload_v8i8_to_v8i1
>  
>  ; EG: LDS_READ_RET
>  ; EG: LDS_READ_RET
> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
> @@ -786,6 +788,11 @@ define void @local_zextload_v16i8_to_v16
>  ; EG: LDS_READ_RET
>  ; EG: LDS_READ_RET
>  ; EG: LDS_READ_RET
> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
> @@ -798,10 +805,6 @@ define void @local_zextload_v16i8_to_v16
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
> @@ -860,6 +863,11 @@ define void @local_zextload_v32i8_to_v32
>  ; EG: LDS_READ_RET
>  ; EG: LDS_READ_RET
>  ; EG: LDS_READ_RET
> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
> +; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
> @@ -884,14 +892,6 @@ define void @local_zextload_v32i8_to_v32
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
>  ; EG-DAG: BFE_INT
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
> -; EG-DAG: ASHR
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
>  ; EG: LDS_WRITE
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170121/52b7cb6b/attachment.sig>


More information about the llvm-commits mailing list