[llvm] r345136 - [X86][SSE] Update PMULDQ schedule tests to survive more aggressive SimplifyDemandedBits
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 24 06:13:36 PDT 2018
Author: rksimon
Date: Wed Oct 24 06:13:36 2018
New Revision: 345136
URL: http://llvm.org/viewvc/llvm-project?rev=345136&view=rev
Log:
[X86][SSE] Update PMULDQ schedule tests to survive more aggressive SimplifyDemandedBits
Modified:
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=345136&r1=345135&r2=345136&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Oct 24 06:13:36 2018
@@ -4734,46 +4734,52 @@ define <4 x i64> @test_pmovzxwq(<8 x i16
ret <4 x i64> %6
}
-define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
+define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> *%a3) {
; GENERIC-LABEL: test_pmuldq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; GENERIC-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
+; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmuldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; HASWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
+; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_pmuldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
+; BROADWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:1.00]
+; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pmuldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
+; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
+; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
+; SKX-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
+; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pmuldq:
; ZNVER1: # %bb.0:
+; ZNVER1-NEXT: vpmuldq (%rdi), %ymm2, %ymm2 # sched: [11:1.00]
; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
+; ZNVER1-NEXT: vpor %ymm2, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
- %2 = bitcast <4 x i64> %1 to <8 x i32>
- %3 = load <8 x i32>, <8 x i32> *%a2, align 32
- %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
+ %2 = load <8 x i32>, <8 x i32> *%a3, align 32
+ %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a2, <8 x i32> %2)
+ %4 = or <4 x i64> %1, %3
ret <4 x i64> %4
}
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=345136&r1=345135&r2=345136&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Wed Oct 24 06:13:36 2018
@@ -4704,106 +4704,122 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
ret <2 x i64> %5
}
-define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
+define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) {
; GENERIC-LABEL: test_pmuldq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; GENERIC-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
+; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SLM-LABEL: test_pmuldq:
; SLM: # %bb.0:
+; SLM-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
; SLM-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
+; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_pmuldq:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_pmuldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
+; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_pmuldq:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_pmuldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; HASWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
+; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_pmuldq:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_pmuldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; BROADWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:1.00]
+; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_pmuldq:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pmuldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50]
+; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_pmuldq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKX-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50]
+; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_pmuldq:
; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_pmuldq:
; BTVER2: # %bb.0:
+; BTVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [7:1.00]
; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_pmuldq:
; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_pmuldq:
; ZNVER1: # %bb.0:
+; ZNVER1-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [11:1.00]
; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; ZNVER1-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
- %2 = bitcast <2 x i64> %1 to <4 x i32>
- %3 = load <4 x i32>, <4 x i32> *%a2, align 16
- %4 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %2, <4 x i32> %3)
+ %2 = load <4 x i32>, <4 x i32> *%a3, align 16
+ %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a2, <4 x i32> %2)
+ %4 = or <2 x i64> %1, %3
ret <2 x i64> %4
}
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
More information about the llvm-commits
mailing list