[PATCH] D47383: [AMDGPU] Avoid using divergent value in mubuf addr64 descriptor
Tim Renouf via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri May 25 11:33:48 PDT 2018
tpr created this revision.
Herald added subscribers: llvm-commits, t-tye, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl, arsenm.
This fixes a problem where a load from global+idx generated incorrect
code on <=gfx7 when the index is divergent.
Change-Id: Ib4d177d6254b1dd3f8ec0203fdddec94bd8bc5ed
Repository:
rL LLVM
https://reviews.llvm.org/D47383
Files:
lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll
Index: test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SICI %s
+
+; GCN-LABEL: {{^}}main:
+; GCN-NOT: readfirstlane
+; SICI: buffer_load_dwordx4 {{.*}} addr64
+
+ at indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0.000000e+00>]
+
+define amdgpu_ps float @main(i32 %arg18) {
+.entry:
+ %tmp31 = sext i32 %arg18 to i64
+ %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31
+ %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
+ %tmp34 = extractelement <3 x float> %tmp33, i32 0
+ ret float %tmp34
+}
+
Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -993,6 +993,21 @@
return true;
}
+
+static MachineSDNode *buildSMovImm64(SelectionDAG &DAG, const SDLoc &DL,
+ uint64_t Val) {
+ SDValue ValLo = DAG.getTargetConstant(Val & 0xffffffffU, DL, MVT::i32);
+ SDValue ValHi = DAG.getTargetConstant(Val >> 32, DL, MVT::i32);
+ const SDValue Ops0[] = {
+ DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
+ SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, ValLo), 0),
+ DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, ValHi), 0),
+ DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+ };
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v2i32, Ops0);
+}
+
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &Offen,
@@ -1026,8 +1041,22 @@
SDValue N2 = N0.getOperand(0);
SDValue N3 = N0.getOperand(1);
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
- Ptr = N2;
- VAddr = N3;
+ if (N2->isDivergent()) {
+ if (N3->isDivergent()) {
+ // Both N2 and N3 are divergent. Keep the add and use N2+N3 as the
+ // vaddr, and construct the resource out of 0.
+ Ptr = SDValue(buildSMovImm64(*CurDAG, DL, 0), 0);
+ VAddr = N0;
+ } else {
+ // N2 is divergent, N3 is not.
+ Ptr = N3;
+ VAddr = N2;
+ }
+ } else {
+ // N2 is not divergent.
+ Ptr = N2;
+ VAddr = N3;
+ }
} else {
// (add N0, C1) -> offset
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
@@ -1054,8 +1083,23 @@
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
- Ptr = N0;
- VAddr = N1;
+
+ if (N0->isDivergent()) {
+ if (N1->isDivergent()) {
+ // Both N0 and N1 are divergent. Use the result of the add as the
+ // addr64, and construct the resource from a 0 address.
+ Ptr = SDValue(buildSMovImm64(*CurDAG, DL, 0), 0);
+ VAddr = Addr;
+ } else {
+ // N0 is divergent, N1 is not.
+ Ptr = N1;
+ VAddr = N0;
+ }
+ } else {
+ // N0 is not divergent.
+ Ptr = N0;
+ VAddr = N1;
+ }
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D47383.148641.patch
Type: text/x-patch
Size: 4034 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180525/d05488e8/attachment.bin>
More information about the llvm-commits
mailing list