[llvm] r353016 - Recommit r352660 "[X86] Mark EMMS and FEMMS as clobbering MM0-7 and ST0-7."

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 3 20:44:20 PST 2019


Author: ctopper
Date: Sun Feb  3 20:44:20 2019
New Revision: 353016

URL: http://llvm.org/viewvc/llvm-project?rev=353016&view=rev
Log:
Recommit r352660 "[X86] Mark EMMS and FEMMS as clobbering MM0-7 and ST0-7."

We now print ST0 as 'st' when generating the clobber list for MS inline assembly in clang. This matches what the gcc reg name list expects.

Original commit message:

This fixes the test case in PR35982 by preventing MMX instructions that read MM0-7 from being moved below EMMS/FEMMS by the post RA scheduler.

Though as discussed in bugzilla, this is not a complete fix. There is still the possibility of reordering in IR or by the pre-RA scheduler.

Differential Revision: https://reviews.llvm.org/D57298

Modified:
    llvm/trunk/lib/Target/X86/X86Instr3DNow.td
    llvm/trunk/lib/Target/X86/X86InstrMMX.td
    llvm/trunk/test/CodeGen/X86/pr35982.ll

Modified: llvm/trunk/lib/Target/X86/X86Instr3DNow.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr3DNow.td?rev=353016&r1=353015&r2=353016&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Instr3DNow.td (original)
+++ llvm/trunk/lib/Target/X86/X86Instr3DNow.td Sun Feb  3 20:44:20 2019
@@ -73,7 +73,9 @@ defm PFSUBR   : I3DNow_binop_rm_int<0xAA
 defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
 defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
 
-let SchedRW = [WriteEMMS] in
+let SchedRW = [WriteEMMS],
+    Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
 def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
                    [(int_x86_mmx_femms)]>, TB;
 

Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=353016&r1=353015&r2=353016&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Sun Feb  3 20:44:20 2019
@@ -152,7 +152,9 @@ multiclass sse12_cvt_pint_3addr<bits<8>
 // MMX EMMS Instruction
 //===----------------------------------------------------------------------===//
 
-let SchedRW = [WriteEMMS] in
+let SchedRW = [WriteEMMS],
+    Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
 def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/pr35982.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr35982.ll?rev=353016&r1=353015&r2=353016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr35982.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr35982.ll Sun Feb  3 20:44:20 2019
@@ -3,49 +3,27 @@
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s --check-prefixes=CHECK,POST
 
 define float @PR35982_emms(<1 x i64>) nounwind {
-; NOPOST-LABEL: PR35982_emms:
-; NOPOST:       # %bb.0:
-; NOPOST-NEXT:    pushl %ebp
-; NOPOST-NEXT:    movl %esp, %ebp
-; NOPOST-NEXT:    andl $-8, %esp
-; NOPOST-NEXT:    subl $16, %esp
-; NOPOST-NEXT:    movl 8(%ebp), %eax
-; NOPOST-NEXT:    movl 12(%ebp), %ecx
-; NOPOST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; NOPOST-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; NOPOST-NEXT:    movd %mm0, %ecx
-; NOPOST-NEXT:    emms
-; NOPOST-NEXT:    movl %eax, (%esp)
-; NOPOST-NEXT:    fildl (%esp)
-; NOPOST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    fiaddl {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    movl %ebp, %esp
-; NOPOST-NEXT:    popl %ebp
-; NOPOST-NEXT:    retl
-;
-; POST-LABEL: PR35982_emms:
-; POST:       # %bb.0:
-; POST-NEXT:    pushl %ebp
-; POST-NEXT:    movl %esp, %ebp
-; POST-NEXT:    andl $-8, %esp
-; POST-NEXT:    subl $16, %esp
-; POST-NEXT:    movl 8(%ebp), %eax
-; POST-NEXT:    movl 12(%ebp), %ecx
-; POST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; POST-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; POST-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; POST-NEXT:    emms
-; POST-NEXT:    movl %eax, (%esp)
-; POST-NEXT:    fildl (%esp)
-; POST-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; POST-NEXT:    movd %mm0, %ecx
-; POST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; POST-NEXT:    fiaddl {{[0-9]+}}(%esp)
-; POST-NEXT:    movl %ebp, %esp
-; POST-NEXT:    popl %ebp
-; POST-NEXT:    retl
+; CHECK-LABEL: PR35982_emms:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    andl $-8, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl 8(%ebp), %eax
+; CHECK-NEXT:    movl 12(%ebp), %ecx
+; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movq {{[0-9]+}}(%esp), %mm0
+; CHECK-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
+; CHECK-NEXT:    movd %mm0, %ecx
+; CHECK-NEXT:    emms
+; CHECK-NEXT:    movl %eax, (%esp)
+; CHECK-NEXT:    fildl (%esp)
+; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fiaddl {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %ebp, %esp
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
   %2 = bitcast <1 x i64> %0 to <2 x i32>
   %3 = extractelement <2 x i32> %2, i32 0
   %4 = extractelement <1 x i64> %0, i32 0
@@ -61,49 +39,27 @@ define float @PR35982_emms(<1 x i64>) no
 }
 
 define float @PR35982_femms(<1 x i64>) nounwind {
-; NOPOST-LABEL: PR35982_femms:
-; NOPOST:       # %bb.0:
-; NOPOST-NEXT:    pushl %ebp
-; NOPOST-NEXT:    movl %esp, %ebp
-; NOPOST-NEXT:    andl $-8, %esp
-; NOPOST-NEXT:    subl $16, %esp
-; NOPOST-NEXT:    movl 8(%ebp), %eax
-; NOPOST-NEXT:    movl 12(%ebp), %ecx
-; NOPOST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; NOPOST-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; NOPOST-NEXT:    movd %mm0, %ecx
-; NOPOST-NEXT:    femms
-; NOPOST-NEXT:    movl %eax, (%esp)
-; NOPOST-NEXT:    fildl (%esp)
-; NOPOST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    fiaddl {{[0-9]+}}(%esp)
-; NOPOST-NEXT:    movl %ebp, %esp
-; NOPOST-NEXT:    popl %ebp
-; NOPOST-NEXT:    retl
-;
-; POST-LABEL: PR35982_femms:
-; POST:       # %bb.0:
-; POST-NEXT:    pushl %ebp
-; POST-NEXT:    movl %esp, %ebp
-; POST-NEXT:    andl $-8, %esp
-; POST-NEXT:    subl $16, %esp
-; POST-NEXT:    movl 8(%ebp), %eax
-; POST-NEXT:    movl 12(%ebp), %ecx
-; POST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; POST-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; POST-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; POST-NEXT:    femms
-; POST-NEXT:    movl %eax, (%esp)
-; POST-NEXT:    fildl (%esp)
-; POST-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; POST-NEXT:    movd %mm0, %ecx
-; POST-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; POST-NEXT:    fiaddl {{[0-9]+}}(%esp)
-; POST-NEXT:    movl %ebp, %esp
-; POST-NEXT:    popl %ebp
-; POST-NEXT:    retl
+; CHECK-LABEL: PR35982_femms:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    andl $-8, %esp
+; CHECK-NEXT:    subl $16, %esp
+; CHECK-NEXT:    movl 8(%ebp), %eax
+; CHECK-NEXT:    movl 12(%ebp), %ecx
+; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movq {{[0-9]+}}(%esp), %mm0
+; CHECK-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
+; CHECK-NEXT:    movd %mm0, %ecx
+; CHECK-NEXT:    femms
+; CHECK-NEXT:    movl %eax, (%esp)
+; CHECK-NEXT:    fildl (%esp)
+; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fiaddl {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %ebp, %esp
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl
   %2 = bitcast <1 x i64> %0 to <2 x i32>
   %3 = extractelement <2 x i32> %2, i32 0
   %4 = extractelement <1 x i64> %0, i32 0




More information about the llvm-commits mailing list