[llvm] 5832950 - [AMDGPU/MemOpsCluster] Compute `width` for `MIMG` instruction class.
Your Name via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 23 05:02:41 PDT 2020
Author: hsmahesha
Date: 2020-06-23T17:32:17+05:30
New Revision: 5832950adbfcd7a5b16922a87a2cde257b7fac43
URL: https://github.com/llvm/llvm-project/commit/5832950adbfcd7a5b16922a87a2cde257b7fac43
DIFF: https://github.com/llvm/llvm-project/commit/5832950adbfcd7a5b16922a87a2cde257b7fac43.diff
LOG: [AMDGPU/MemOpsCluster] Compute `width` for `MIMG` instruction class.
Summary:
`width` computation is missing for newly added `MIMG`
instruction class. Add it.
Reviewers: foad, rampitec, arsenm
Reviewed By: foad
Subscribers: MatzeB, javed.absar, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81649
Added:
Modified:
llvm/lib/CodeGen/MachineScheduler.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/cluster_stores.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 0f21c97a30f6..cf75d531deb2 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1573,8 +1573,13 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
bool OffsetIsScalable;
unsigned Width;
if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
- OffsetIsScalable, Width, TRI))
+ OffsetIsScalable, Width, TRI)) {
MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
+
+ LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
+ << Offset << ", OffsetIsScalable: " << OffsetIsScalable
+ << ", Width: " << Width << "\n");
+ }
#ifndef NDEBUG
for (auto *Op : BaseOps)
assert(Op);
@@ -1630,6 +1635,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
<< ")\n");
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
}
+
+ LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
+ << ", Curr cluster bytes: " << CurrentClusterBytes
+ << "\n");
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0dcaecd844e5..77bfd888e94b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -396,6 +396,9 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr));
}
Offset = 0;
+ // Get appropriate operand, and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+ Width = getOpSize(LdSt, DataOpIdx);
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
index 67d6f686f459..bc3bcfe6089a 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
@@ -5,6 +5,14 @@
; CHECK-LABEL: {{^}}cluster_load_cluster_store:
define amdgpu_kernel void @cluster_load_cluster_store(i32* noalias %lb, i32* noalias %sb) {
bb:
+
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+
; DBG: Cluster ld/st SU(1) - SU(2)
; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]])
@@ -45,6 +53,13 @@ bb:
; CHECK-LABEL: {{^}}cluster_load_valu_cluster_store:
define amdgpu_kernel void @cluster_load_valu_cluster_store(i32* noalias %lb, i32* noalias %sb) {
bb:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+
; DBG: Cluster ld/st SU(1) - SU(2)
; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]])
@@ -86,6 +101,8 @@ bb:
; Cluster loads from the same texture with
diff erent coordinates
; CHECK-LABEL: {{^}}cluster_image_load:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]]
; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_LOAD
; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_LOAD
@@ -106,6 +123,9 @@ entry:
; Don't cluster loads from
diff erent textures
; CHECK-LABEL: {{^}}no_cluster_image_load:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
; DBG-NOT: {{^}}Cluster ld/st
define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) {
entry:
@@ -118,6 +138,8 @@ entry:
; Cluster loads from the same texture and sampler with
diff erent coordinates
; CHECK-LABEL: {{^}}cluster_image_sample:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]]
; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_SAMPLE
; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_SAMPLE
More information about the llvm-commits
mailing list