[llvm] r292651 - AMDGPU/R600: Serialize vector trunc stores to private AS
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 23 10:28:05 PST 2017
Matt: Ok to merge this?
On Sat, Jan 21, 2017 at 4:27 PM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
> Hi Hans,
>
> I'd like this commit to be included in 4.0 release.
> It fixes invalid code generation for sub-int vectors.
>
> thanks,
> Jan
>
> On Fri, 2017-01-20 at 21:24 +0000, Jan Vesely via llvm-commits wrote:
>> Author: jvesely
>> Date: Fri Jan 20 15:24:26 2017
>> New Revision: 292651
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=292651&view=rev
>> Log:
>> AMDGPU/R600: Serialize vector trunc stores to private AS
>>
>> Add DUMMY_CHAIN SDNode to denote stores of interest
>>
>> Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=28915
>> Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=30411
>>
>> Differential Revision: https://reviews.llvm.org/D27964
>>
>> Modified:
>> llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
>> llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
>> llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
>> llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>> llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
>> llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=292651&r1=292650&r2=292651&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Jan 20 15:24:26 2017
>> @@ -3278,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTar
>> NODE_NAME_CASE(CONST_DATA_PTR)
>> NODE_NAME_CASE(PC_ADD_REL_OFFSET)
>> NODE_NAME_CASE(KILL)
>> + NODE_NAME_CASE(DUMMY_CHAIN)
>> case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
>> NODE_NAME_CASE(SENDMSG)
>> NODE_NAME_CASE(SENDMSGHALT)
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=292651&r1=292650&r2=292651&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Jan 20 15:24:26 2017
>> @@ -330,6 +330,7 @@ enum NodeType : unsigned {
>> INTERP_P2,
>> PC_ADD_REL_OFFSET,
>> KILL,
>> + DUMMY_CHAIN,
>> FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>> STORE_MSKOR,
>> LOAD_CONSTANT,
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=292651&r1=292650&r2=292651&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Jan 20 15:24:26 2017
>> @@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
>> // This argument to this node is a dword address.
>> def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
>>
>> +// Force dependencies for vector trunc stores
>> +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
>> +
>> def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
>> def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
>>
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=292651&r1=292650&r2=292651&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Fri Jan 20 15:24:26 2017
>> @@ -1120,7 +1120,10 @@ SDValue R600TargetLowering::lowerPrivate
>> llvm_unreachable("Unsupported private trunc store");
>> }
>>
>> - SDValue Chain = Store->getChain();
>> + SDValue OldChain = Store->getChain();
>> + bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
>> + // Skip dummy
>> + SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
>> SDValue BasePtr = Store->getBasePtr();
>> SDValue Offset = Store->getOffset();
>> EVT MemVT = Store->getMemoryVT();
>> @@ -1176,7 +1179,15 @@ SDValue R600TargetLowering::lowerPrivate
>>
>> // Store dword
>> // TODO: Can we be smarter about MachinePointerInfo?
>> - return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
>> + SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
>> +
>> + // If we are part of expanded vector, make our neighbors depend on this store
>> + if (VectorTrunc) {
>> + // Make all other vector elements depend on this store
>> + Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
>> + DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
>> + }
>> + return NewStore;
>> }
>>
>> SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
>> @@ -1196,6 +1207,17 @@ SDValue R600TargetLowering::LowerSTORE(S
>> // Neither LOCAL nor PRIVATE can do vectors at the moment
>> if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
>> VT.isVector()) {
>> + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
>> + // Add an extra level of chain to isolate this vector
>> + SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
>> + // TODO: can the chain be replaced without creating a new store?
>> + SDValue NewStore = DAG.getTruncStore(
>> + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
>> + MemVT, StoreNode->getAlignment(),
>> + StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
>> + StoreNode = cast<StoreSDNode>(NewStore);
>> + }
>> +
>> return scalarizeVectorStore(StoreNode, DAG);
>> }
>>
>> @@ -1230,7 +1252,7 @@ SDValue R600TargetLowering::LowerSTORE(S
>> // Put the mask in correct place
>> SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
>>
>> - // Put the mask in correct place
>> + // Put the value bits in correct place
>> SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
>> SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
>>
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=292651&r1=292650&r2=292651&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Fri Jan 20 15:24:26 2017
>> @@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOO
>>
>> def MOV : R600_1OP <0x19, "MOV", []>;
>>
>> +
>> +// This is a hack to get rid of DUMMY_CHAIN nodes.
>> +// Most DUMMY_CHAINs should be eliminated during legalization, but undef
>> +// values can sneak in some to selection.
>> +let isPseudo = 1, isCodeGenOnly = 1 in {
>> +def DUMMY_CHAIN : AMDGPUInst <
>> + (outs),
>> + (ins),
>> + "DUMMY_CHAIN",
>> + [(R600dummy_chain)]
>> +>;
>> +} // end let isPseudo = 1, isCodeGenOnly = 1
>> +
>> +
>> let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
>>
>> class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
>>
>> Modified: llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll?rev=292651&r1=292650&r2=292651&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll (original)
>> +++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll Fri Jan 20 15:24:26 2017
>> @@ -708,10 +708,11 @@ define void @local_zextload_v4i8_to_v4i1
>> ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
>>
>> ; EG: LDS_READ_RET
>> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
>> +; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> -; EG-DAG: ASHR
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>> define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
>> @@ -740,14 +741,15 @@ define void @local_zextload_v8i8_to_v8i1
>>
>> ; EG: LDS_READ_RET
>> ; EG: LDS_READ_RET
>> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>> @@ -786,6 +788,11 @@ define void @local_zextload_v16i8_to_v16
>> ; EG: LDS_READ_RET
>> ; EG: LDS_READ_RET
>> ; EG: LDS_READ_RET
>> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> @@ -798,10 +805,6 @@ define void @local_zextload_v16i8_to_v16
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>> @@ -860,6 +863,11 @@ define void @local_zextload_v32i8_to_v32
>> ; EG: LDS_READ_RET
>> ; EG: LDS_READ_RET
>> ; EG: LDS_READ_RET
>> +; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> +; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> @@ -884,14 +892,6 @@ define void @local_zextload_v32i8_to_v32
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> ; EG-DAG: BFE_INT
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> -; EG-DAG: ASHR
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>> ; EG: LDS_WRITE
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list