[llvm] r327505 - [X86][SSE] Use WriteFShuffleLd for MOVDDUP/MOVSHDUP/MOVSLDUP reg-mem instructions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 14 06:22:56 PDT 2018


Author: rksimon
Date: Wed Mar 14 06:22:56 2018
New Revision: 327505

URL: http://llvm.org/viewvc/llvm-project?rev=327505&view=rev
Log:
[X86][SSE] Use WriteFShuffleLd for MOVDDUP/MOVSHDUP/MOVSLDUP reg-mem instructions

They shouldn't be treated as pure loads.

Found while investigating D44428

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse3-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327505&r1=327504&r2=327505&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Mar 14 06:22:56 2018
@@ -4708,6 +4708,7 @@ let AddedComplexity = 20 in {
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
 //===---------------------------------------------------------------------===//
+
 multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
                               ValueType vt, RegisterClass RC, PatFrag mem_frag,
                               X86MemOperand x86memop> {
@@ -4718,7 +4719,7 @@ def rr : S3SI<op, MRMSrcReg, (outs RC:$d
 def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                       [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
-                      IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
+                      IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd]>;
 }
 
 let Predicates = [HasAVX, NoVLX] in {
@@ -4786,10 +4787,10 @@ def rm  : S3DI<0x12, MRMSrcMem, (outs VR
                     [(set VR128:$dst,
                       (v2f64 (X86Movddup
                               (scalar_to_vector (loadf64 addr:$src)))))],
-                              IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
+                              IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd]>;
 }
 
-// FIXME: Merge with above classe when there're patterns for the ymm version
+// FIXME: Merge with above classes when there are patterns for the ymm version
 multiclass sse3_replicate_dfp_y<string OpcodeStr> {
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -4799,7 +4800,7 @@ def rm  : S3DI<0x12, MRMSrcMem, (outs VR
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
-                    Sched<[WriteLoad]>;
+                    Sched<[WriteFShuffleLd]>;
 }
 
 let Predicates = [HasAVX, NoVLX] in {

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327505&r1=327504&r2=327505&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Wed Mar 14 06:22:56 2018
@@ -2690,7 +2690,7 @@ define <4 x double> @test_movddup(<4 x d
 ;
 ; BTVER2-LABEL: test_movddup:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [5:1.00]
+; BTVER2-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:1.00]
 ; BTVER2-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -3030,7 +3030,7 @@ define <8 x float> @test_movshdup(<8 x f
 ;
 ; BTVER2-LABEL: test_movshdup:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [5:1.00]
+; BTVER2-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:1.00]
 ; BTVER2-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -3093,7 +3093,7 @@ define <8 x float> @test_movsldup(<8 x f
 ;
 ; BTVER2-LABEL: test_movsldup:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [5:1.00]
+; BTVER2-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:1.00]
 ; BTVER2-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=327505&r1=327504&r2=327505&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Wed Mar 14 06:22:56 2018
@@ -563,9 +563,10 @@ define <2 x double> @test_movddup(<2 x d
 ;
 ; SLM-LABEL: test_movddup:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SLM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [3:1.00]
-; SLM-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
+; SLM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
+; SLM-NEXT:    subpd %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_movddup:
@@ -605,7 +606,7 @@ define <2 x double> @test_movddup(<2 x d
 ;
 ; BTVER2-LABEL: test_movddup:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:1.00]
+; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
 ; BTVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
 ; BTVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -641,9 +642,10 @@ define <4 x float> @test_movshdup(<4 x f
 ;
 ; SLM-LABEL: test_movshdup:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SLM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [3:1.00]
-; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
+; SLM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
+; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_movshdup:
@@ -683,7 +685,7 @@ define <4 x float> @test_movshdup(<4 x f
 ;
 ; BTVER2-LABEL: test_movshdup:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:1.00]
+; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
 ; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
@@ -719,9 +721,10 @@ define <4 x float> @test_movsldup(<4 x f
 ;
 ; SLM-LABEL: test_movsldup:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SLM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [3:1.00]
-; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
+; SLM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
+; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
+; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_movsldup:
@@ -761,7 +764,7 @@ define <4 x float> @test_movsldup(<4 x f
 ;
 ; BTVER2-LABEL: test_movsldup:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:1.00]
+; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
 ; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]




More information about the llvm-commits mailing list