[PATCH] D20839: AMDGPU: Add amdgpu-ps-wqm-outputs function attributes
Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 7 14:44:05 PDT 2016
This revision was automatically updated to reflect the committed changes.
Closed by commit rL272063: AMDGPU: Add amdgpu-ps-wqm-outputs function attributes (authored by nha).
Changed prior to commit:
http://reviews.llvm.org/D20839?vs=59195&id=59955#toc
Repository:
rL LLVM
http://reviews.llvm.org/D20839
Files:
llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
Index: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -154,14 +154,15 @@
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;
+ bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
MachineInstr &MI = *II;
unsigned Opcode = MI.getOpcode();
- char Flags;
+ char Flags = 0;
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
Flags = StateWQM;
@@ -175,15 +176,39 @@
ExecExports.push_back(&MI);
} else if (Opcode == AMDGPU::SI_PS_LIVE) {
LiveMaskQueries.push_back(&MI);
+ } else if (WQMOutputs) {
+ // The function is in machine SSA form, which means that physical
+ // VGPRs correspond to shader inputs and outputs. Inputs are
+ // only used, outputs are only defined.
+ for (const MachineOperand &MO : MI.defs()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+
+ if (!TRI->isVirtualRegister(Reg) &&
+ TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
+ Flags = StateWQM;
+ break;
+ }
+ }
}
- continue;
+ if (!Flags)
+ continue;
}
Instructions[&MI].Needs = Flags;
Worklist.push_back(&MI);
GlobalFlags |= Flags;
}
+
+ if (WQMOutputs && MBB.succ_empty()) {
+ // This is a prolog shader. Make sure we go back to exact mode at the end.
+ Blocks[&MBB].OutNeeds = StateExact;
+ Worklist.push_back(&MBB);
+ GlobalFlags |= StateExact;
+ }
}
return GlobalFlags;
Index: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
@@ -332,6 +332,19 @@
ret <4 x float> %tex
}
+; Check prolog shaders.
+;
+; CHECK-LABEL: {{^}}test_prolog_1:
+; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: v_add_f32_e32 v0,
+; CHECK: s_and_b64 exec, exec, [[ORIG]]
+define amdgpu_ps float @test_prolog_1(float %a, float %b) #4 {
+main_body:
+ %s = fadd float %a, %b
+ ret float %s
+}
+
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
@@ -345,3 +358,4 @@
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
attributes #3 = { nounwind readnone }
+attributes #4 = { "amdgpu-ps-wqm-outputs" }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20839.59955.patch
Type: text/x-patch
Size: 3031 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160607/c219cd33/attachment.bin>
More information about the llvm-commits
mailing list