[PATCH] D39134: [X86][SSE] Add MOVHPSrm to domain tables
Simon Pilgrim via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 20 12:05:13 PDT 2017
RKSimon created this revision.
This is also a shorter encoding on SSE.
What should I do for the avx512 cases? Do you want me to add a fadd stage to force domain?
Repository:
rL LLVM
https://reviews.llvm.org/D39134
Files:
lib/Target/X86/X86InstrInfo.cpp
test/CodeGen/X86/avx512-shuffle-schedule.ll
test/CodeGen/X86/avx512-shuffles/unpack.ll
test/CodeGen/X86/sse2.ll
test/CodeGen/X86/vector-rem.ll
Index: test/CodeGen/X86/vector-rem.ll
===================================================================
--- test/CodeGen/X86/vector-rem.ll
+++ test/CodeGen/X86/vector-rem.ll
@@ -106,8 +106,8 @@
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movhps (%rsp), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0,1],mem[0,1]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
Index: test/CodeGen/X86/sse2.ll
===================================================================
--- test/CodeGen/X86/sse2.ll
+++ test/CodeGen/X86/sse2.ll
@@ -363,13 +363,13 @@
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movaps 96(%eax), %xmm0
-; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X86-NEXT: retl
;
; X64-LABEL: test16:
; X64: # BB#0:
; X64-NEXT: movaps 96(%rdi), %xmm0
-; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; X64-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; X64-NEXT: retq
%i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
%i6 = load <4 x double>, <4 x double>* %i5, align 32
Index: test/CodeGen/X86/avx512-shuffles/unpack.ll
===================================================================
--- test/CodeGen/X86/avx512-shuffles/unpack.ll
+++ test/CodeGen/X86/avx512-shuffles/unpack.ll
@@ -826,7 +826,7 @@
define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
; CHECK-LABEL: test_2xdouble_unpack_low_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; CHECK-NEXT: retq
%vec2 = load <2 x double>, <2 x double>* %vec2p
%res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
Index: test/CodeGen/X86/avx512-shuffle-schedule.ll
===================================================================
--- test/CodeGen/X86/avx512-shuffle-schedule.ll
+++ test/CodeGen/X86/avx512-shuffle-schedule.ll
@@ -8998,7 +8998,7 @@
define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
; CHECK-LABEL: test_2xdouble_unpack_low_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
+; CHECK-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] sched: [6:1.00]
; CHECK-NEXT: retq # sched: [7:1.00]
%vec2 = load <2 x double>, <2 x double>* %vec2p
%res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -9443,7 +9443,7 @@
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
- { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
+ { X86::MOVHPSrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
{ X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
{ X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
{ X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
@@ -9471,7 +9471,7 @@
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
- { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
+ { X86::VMOVHPSrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
{ X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
{ X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
{ X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
@@ -9561,7 +9561,7 @@
{ X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr },
{ X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
{ X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
- { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
+ { X86::VMOVHPSZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
{ X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
{ X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
{ X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D39134.119679.patch
Type: text/x-patch
Size: 4798 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171020/a8339ac0/attachment.bin>
More information about the llvm-commits
mailing list