[PATCH] D56161: [AMDGPU] Fix scalar operand folding.

Sun Dec 30 11:18:20 PST 2018

alex-t created this revision.
alex-t added reviewers: rampitec, ronlieb.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.

SIFoldOperands::foldInstOperand  iterate over the operand uses calling the function that changes the def-use iterators on the way.  As a result it is able to fold to the very first use instruction only. As soon as def-use chain changed loop exits.
Proposed fix: collect uses to separate container and process them further.

Tests:  CodeGen/AMDGPU


https://reviews.llvm.org/D56161

Files:
  lib/Target/AMDGPU/SIFoldOperands.cpp
  test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll


Index: test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
===================================================================

--- test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -131,10 +131,10 @@
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
@@ -166,10 +166,10 @@
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
@@ -246,7 +246,7 @@
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]]
 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}}
@@ -288,7 +288,7 @@
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]]
 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}}
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -854,13 +854,17 @@
     }
   } else {
     // Folding register.
+    SmallVector <MachineRegisterInfo::use_iterator, 4> UsesToProcess;
     for (MachineRegisterInfo::use_iterator
            Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
          Use != E; ++Use) {
-      MachineInstr *UseMI = Use->getParent();
+      UsesToProcess.push_back(Use);
+    }
+    for (auto U : UsesToProcess) {
+      MachineInstr *UseMI = U->getParent();
 
-      foldOperand(OpToFold, UseMI, Use.getOperandNo(),
-                  FoldList, CopiesToReplace);
+      foldOperand(OpToFold, UseMI, U.getOperandNo(),
+        FoldList, CopiesToReplace);
     }
   }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D56161.179737.patch
Type: text/x-patch
Size: 3849 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181230/277ff031/attachment.bin>