[PATCH] D39402: [X86] Prevent fast isel from folding loads into the instructions listed in hasPartialRegUpdate.

Sun Oct 29 21:55:18 PDT 2017

craig.topper updated this revision to Diff 120769.
craig.topper added a comment.

Rebase after function reordering.


https://reviews.llvm.org/D39402

Files:
  lib/Target/X86/X86InstrInfo.cpp
  test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
  test/CodeGen/X86/fast-isel-int-float-conversion.ll


Index: test/CodeGen/X86/fast-isel-int-float-conversion.ll
===================================================================

--- test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -21,7 +21,8 @@
 define double @int_to_double_rm(i32* %a) {
 ; SSE2-LABEL: int_to_double_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2sdl %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_double_rm:
@@ -52,7 +53,8 @@
 define float @int_to_float_rm(i32* %a) {
 ; SSE2-LABEL: int_to_float_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2ssl %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_float_rm:
Index: test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
===================================================================
--- test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
+++ test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -55,7 +55,8 @@
 define double @single_to_double_rm(float* %x) {
 ; SSE-LABEL: single_to_double_rm:
 ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: single_to_double_rm:
@@ -72,7 +73,8 @@
 define float @double_to_single_rm(double* %x) {
 ; SSE-LABEL: double_to_single_rm:
 ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: double_to_single_rm:
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -8400,6 +8400,10 @@
        MI.getOpcode() == X86::PUSH64r))
     return nullptr;
 
+  // Avoid partial register update stalls unless optimizing for size.
+  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
+    return nullptr;
+
   unsigned NumOps = MI.getDesc().getNumOperands();
   bool isTwoAddr =
       NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -8563,11 +8567,6 @@
   if (NoFusing)
     return nullptr;
 
-  // Unless optimizing for size, don't fold to avoid partial
-  // register update stalls
-  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
-    return nullptr;
-
   // Don't fold subreg spills, or reloads that use a high subreg.
   for (auto Op : Ops) {
     MachineOperand &MO = MI.getOperand(Op);
@@ -8762,10 +8761,6 @@
   // Check switch flag
   if (NoFusing) return nullptr;
 
-  // Avoid partial register update stalls unless optimizing for size.
-  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
-    return nullptr;
-
   // Determine the alignment of the load.
   unsigned Alignment = 0;
   if (LoadMI.hasOneMemOperand())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D39402.120769.patch
Type: text/x-patch
Size: 3036 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171030/321734c0/attachment.bin>