[PATCH] D56161: [AMDGPU] Fix scalar operand folding bug that causes SHOC performance regression
Alexander via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 3 11:59:16 PST 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rL350350: [AMDGPU] Fix scalar operand folding bug that causes SHOC performance regression. (authored by alex-t, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D56161?vs=179737&id=180118#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D56161/new/
https://reviews.llvm.org/D56161
Files:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -131,10 +131,10 @@
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
; GCN-DENORM-DAG: v_rcp_f32_e32
; GCN-DENORM-DAG: v_rcp_f32_e32
; GCN-DENORM-DAG: v_rcp_f32_e32
@@ -166,10 +166,10 @@
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
; GCN-DENORM-DAG: v_rcp_f32_e32
; GCN-DENORM-DAG: v_rcp_f32_e32
; GCN-DENORM-DAG: v_rcp_f32_e32
@@ -246,7 +246,7 @@
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}}
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]]
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}}
@@ -288,7 +288,7 @@
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}}
; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]]
; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}}
Index: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -854,13 +854,17 @@
}
} else {
// Folding register.
+ SmallVector <MachineRegisterInfo::use_iterator, 4> UsesToProcess;
for (MachineRegisterInfo::use_iterator
Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
Use != E; ++Use) {
- MachineInstr *UseMI = Use->getParent();
+ UsesToProcess.push_back(Use);
+ }
+ for (auto U : UsesToProcess) {
+ MachineInstr *UseMI = U->getParent();
- foldOperand(OpToFold, UseMI, Use.getOperandNo(),
- FoldList, CopiesToReplace);
+ foldOperand(OpToFold, UseMI, U.getOperandNo(),
+ FoldList, CopiesToReplace);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D56161.180118.patch
Type: text/x-patch
Size: 3915 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190103/af150074/attachment.bin>
More information about the llvm-commits
mailing list