[llvm] r329593 - [X86] Revert the SLM part of r328914.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 9 10:07:40 PDT 2018


Author: ctopper
Date: Mon Apr  9 10:07:40 2018
New Revision: 329593

URL: http://llvm.org/viewvc/llvm-project?rev=329593&view=rev
Log:
[X86] Revert the SLM part of r328914.

While it appears to be correct information based on Intel's optimization manual and Agner's data, it causes perf regressions on a couple of the benchmarks in our internal list.

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/test/CodeGen/X86/slow-pmulld.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=329593&r1=329592&r2=329593&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Mon Apr  9 10:07:40 2018
@@ -145,7 +145,9 @@ defm : SLMWriteResPair<WriteVecShift, [S
 defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecALU,   [SLM_FPC_RSV01],  1>;
 defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
-defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;
+// FIXME: The below is closer to correct, but caused some perf regressions.
+//defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;
+defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   4>;
 defm : SLMWriteResPair<WriteShuffle,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;

Modified: llvm/trunk/test/CodeGen/X86/slow-pmulld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/slow-pmulld.ll?rev=329593&r1=329592&r2=329593&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/slow-pmulld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/slow-pmulld.ll Mon Apr  9 10:07:40 2018
@@ -1215,32 +1215,34 @@ define <8 x i32> @test_mul_v8i32_v8i16_m
 define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
 ; SLM32-LABEL: test_mul_v16i32_v16i16_minsize:
 ; SLM32:       # %bb.0:
-; SLM32-NEXT:    movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
-; SLM32-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SLM32-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM32-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM32-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 ; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
-; SLM32-NEXT:    pmulld %xmm5, %xmm0
-; SLM32-NEXT:    pmulld %xmm5, %xmm2
-; SLM32-NEXT:    pmulld %xmm5, %xmm1
-; SLM32-NEXT:    pmulld %xmm5, %xmm3
+; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM32-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM32-NEXT:    movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
+; SLM32-NEXT:    pmulld %xmm1, %xmm4
+; SLM32-NEXT:    pmulld %xmm1, %xmm0
+; SLM32-NEXT:    pmulld %xmm1, %xmm2
+; SLM32-NEXT:    pmulld %xmm1, %xmm3
+; SLM32-NEXT:    movdqa %xmm4, %xmm1
 ; SLM32-NEXT:    retl
 ;
 ; SLM64-LABEL: test_mul_v16i32_v16i16_minsize:
 ; SLM64:       # %bb.0:
-; SLM64-NEXT:    movdqa {{.*#+}} xmm5 = [18778,18778,18778,18778]
-; SLM64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SLM64-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 ; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
-; SLM64-NEXT:    pmulld %xmm5, %xmm0
-; SLM64-NEXT:    pmulld %xmm5, %xmm2
-; SLM64-NEXT:    pmulld %xmm5, %xmm1
-; SLM64-NEXT:    pmulld %xmm5, %xmm3
+; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SLM64-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SLM64-NEXT:    movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
+; SLM64-NEXT:    pmulld %xmm1, %xmm4
+; SLM64-NEXT:    pmulld %xmm1, %xmm0
+; SLM64-NEXT:    pmulld %xmm1, %xmm2
+; SLM64-NEXT:    pmulld %xmm1, %xmm3
+; SLM64-NEXT:    movdqa %xmm4, %xmm1
 ; SLM64-NEXT:    retq
 ;
 ; SLOW32-LABEL: test_mul_v16i32_v16i16_minsize:

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=329593&r1=329592&r2=329593&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Mon Apr  9 10:07:40 2018
@@ -4817,8 +4817,8 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ;
 ; SLM-LABEL: test_pmulld:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    pmulld %xmm1, %xmm0 # sched: [11:11.00]
-; SLM-NEXT:    pmulld (%rdi), %xmm0 # sched: [14:11.00]
+; SLM-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:1.00]
+; SLM-NEXT:    pmulld (%rdi), %xmm0 # sched: [7:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: test_pmulld:




More information about the llvm-commits mailing list