[PATCH] D56454: AMDGPU: Adjust the chain for loads writing to the HI part of a register.
Changpeng Fang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 8 14:19:41 PST 2019
cfang created this revision.
cfang added reviewers: rampitec, arsenm.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
For these loads that write to the HI part of a register, we should chain them to the op that writes to the LO part
of the register to maintain the appropriate order.
https://reviews.llvm.org/D56454
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/chain-hi-to-lo.ll
Index: test/CodeGen/AMDGPU/chain-hi-to-lo.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: {{^}}chain_hi_to_lo:
+; GCN: buffer_load_ushort [[DST:v[0-9]+]], off, [[RSRC:s\[[0-9]+:[0-9]+\]]], [[SOFF:s[0-9]+]] offset:2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_short_d16_hi [[DST]], off, [[RSRC]], [[SOFF]]
+
+define amdgpu_kernel void @chain_hi_to_lo() {
+bb:
+ %loads = load <2 x half>, <2 x half> addrspace(5)* null, align 2
+ %shuffled = shufflevector <2 x half> %loads, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+ br label %bb1
+
+bb1:
+ call void asm sideeffect "; use $0", "v"(<2 x half> %shuffled)
+ br label %bb1
+}
+
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8998,6 +8998,42 @@
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
+ case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
+ case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
+ case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
+ case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
+ case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
+ case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: {
+ // For these loads that write to the HI part of a register,
+ // we should chain them to the op that writes to the LO part
+ // of the register to maintain the order.
+ unsigned NumOps = Node->getNumOperands();
+ SDValue OldChain = Node->getOperand(NumOps-1);
+
+ if (OldChain.getValueType() != MVT::Other)
+ break;
+
+ // Look for the chain to replace to.
+ SDValue Lo = Node->getOperand(NumOps-2);
+ SDNode *LoNode = Lo.getNode();
+ if (LoNode->getNumValues() == 1 ||
+ LoNode->getValueType(LoNode->getNumValues() - 1) != MVT::Other)
+ break;
+
+ SDValue NewChain = Lo.getValue(LoNode->getNumValues() - 1);
+ if (NewChain == OldChain) // Already replaced.
+ break;
+
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned I = 0; I < NumOps-1; ++I)
+ Ops.push_back(Node->getOperand(I));
+ // Repalce the Chain.
+ Ops.push_back(NewChain);
+ MachineSDNode *NewNode = DAG.getMachineNode(Opcode, SDLoc(Node),
+ Node->getVTList(), Ops);
+ DAG.setNodeMemRefs(NewNode, Node->memoperands());
+ return NewNode;
+ }
default:
break;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D56454.180737.patch
Type: text/x-patch
Size: 2651 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190108/560cb9d2/attachment-0001.bin>
More information about the llvm-commits
mailing list