[llvm] r365292 - [X86] Make movsd commutable to shufpd with a 0x02 immediate on pre-SSE4.1 targets.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 7 23:52:44 PDT 2019
Author: ctopper
Date: Sun Jul 7 23:52:43 2019
New Revision: 365292
URL: http://llvm.org/viewvc/llvm-project?rev=365292&view=rev
Log:
[X86] Make movsd commutable to shufpd with a 0x02 immediate on pre-SSE4.1 targets.
This can help avoid a copy or enable load folding.
On SSE4.1 targets we can commute it to blendi instead.
I had to make shufpd with a 0x02 immediate commutable as well
since we expect commuting to be reversible.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll
llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
llvm/trunk/test/CodeGen/X86/psubus.ll
llvm/trunk/test/CodeGen/X86/sdiv-exact.ll
llvm/trunk/test/CodeGen/X86/sse2.ll
llvm/trunk/test/CodeGen/X86/vector-blend.ll
llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/trunk/test/CodeGen/X86/vselect-2.ll
llvm/trunk/test/CodeGen/X86/vselect.ll
llvm/trunk/test/CodeGen/X86/x86-shifts.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Jul 7 23:52:43 2019
@@ -1542,20 +1542,39 @@ MachineInstr *X86InstrInfo::commuteInstr
case X86::VMOVSDrr:
case X86::VMOVSSrr:{
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
- assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
+ if (Subtarget.hasSSE41()) {
+ unsigned Mask, Opc;
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
+ case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
+ case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
+ case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ }
- unsigned Mask, Opc;
- switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
- case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
- case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
- case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
}
+ // Convert to SHUFPD.
+ assert(MI.getOpcode() == X86::MOVSDrr &&
+ "Can only commute MOVSDrr without SSE4.1");
+
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.setDesc(get(Opc));
- WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+ WorkingMI.setDesc(get(X86::SHUFPDrri));
+ WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+ case X86::SHUFPDrri: {
+ // Commute to MOVSD.
+ assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(X86::MOVSDrr));
+ WorkingMI.RemoveOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -1874,13 +1893,18 @@ bool X86InstrInfo::findCommutedOpIndices
}
return false;
}
- case X86::MOVSDrr:
case X86::MOVSSrr:
- case X86::VMOVSDrr:
- case X86::VMOVSSrr:
+ // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
+ // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
+ // AVX implies sse4.1.
if (Subtarget.hasSSE41())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
+ case X86::SHUFPDrri:
+ // We can commute this to MOVSD.
+ if (MI.getOperand(3).getImm() == 0x02)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ return false;
case X86::MOVHLPSrr:
case X86::UNPCKHPDrr:
case X86::VMOVHLPSrr:
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Jul 7 23:52:43 2019
@@ -1951,12 +1951,14 @@ let Predicates = [UseSSE1] in {
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
- X86FoldableSchedWrite sched, Domain d> {
+ X86FoldableSchedWrite sched, Domain d,
+ bit IsCommutable = 0> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+ let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -1988,7 +1990,7 @@ let Constraints = "$src1 = $dst" in {
memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
+ memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Sun Jul 7 23:52:43 2019
@@ -65,9 +65,7 @@ entry:
define <2 x double> @test_negative_zero_2(<2 x double> %A) {
; SSE2-LABEL: test_negative_zero_2:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movapd {{.*#+}} xmm1 = <u,-0.0E+0>
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_negative_zero_2:
Modified: llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll Sun Jul 7 23:52:43 2019
@@ -9,8 +9,7 @@
define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
; SSE2-LABEL: insert_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: insert_f64:
Modified: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sdiv.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll Sun Jul 7 23:52:43 2019
@@ -1529,8 +1529,7 @@ define <2 x i64> @combine_vec_sdiv_by_po
; SSE2-NEXT: psrlq $2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
@@ -1616,24 +1615,23 @@ define <4 x i64> @combine_vec_sdiv_by_po
; SSE2-NEXT: psrlq $2, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: psrlq $61, %xmm0
-; SSE2-NEXT: psrlq $60, %xmm3
-; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
-; SSE2-NEXT: paddq %xmm1, %xmm3
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: psrlq $3, %xmm0
-; SSE2-NEXT: psrlq $4, %xmm3
-; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
-; SSE2-NEXT: movapd {{.*#+}} xmm0 = [1152921504606846976,576460752303423488]
-; SSE2-NEXT: xorpd %xmm0, %xmm3
-; SSE2-NEXT: psubq %xmm0, %xmm3
-; SSE2-NEXT: movapd %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm3, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: psrlq $61, %xmm3
+; SSE2-NEXT: psrlq $60, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
+; SSE2-NEXT: paddq %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: psrlq $3, %xmm1
+; SSE2-NEXT: psrlq $4, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; SSE2-NEXT: movapd {{.*#+}} xmm1 = [1152921504606846976,576460752303423488]
+; SSE2-NEXT: xorpd %xmm1, %xmm2
+; SSE2-NEXT: psubq %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
@@ -1745,29 +1743,28 @@ define <4 x i64> @combine_vec_sdiv_by_po
define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; SSE2-LABEL: combine_vec_sdiv_by_pow2b_v8i64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: psrlq $62, %xmm0
-; SSE2-NEXT: paddq %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: psrlq $62, %xmm4
+; SSE2-NEXT: paddq %xmm0, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: psrad $2, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
-; SSE2-NEXT: psrlq $2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; SSE2-NEXT: movdqa %xmm4, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: psrlq $62, %xmm2
-; SSE2-NEXT: paddq %xmm4, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: psrlq $2, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: psrlq $62, %xmm4
+; SSE2-NEXT: paddq %xmm2, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: psrad $2, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
-; SSE2-NEXT: psrlq $2, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm4[0],xmm2[1]
+; SSE2-NEXT: psrlq $2, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm4[1]
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sun Jul 7 23:52:43 2019
@@ -657,46 +657,47 @@ define <16 x i8> @test14(<16 x i8> %x, <
;
; SSSE3-LABEL: test14:
; SSSE3: # %bb.0: # %vector.ph
-; SSSE3-NEXT: movdqa %xmm0, %xmm5
-; SSSE3-NEXT: pxor %xmm0, %xmm0
-; SSSE3-NEXT: movdqa %xmm5, %xmm7
-; SSSE3-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSSE3-NEXT: movdqa %xmm7, %xmm8
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
-; SSSE3-NEXT: movdqa %xmm5, %xmm10
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm0[0],xmm10[1],xmm0[1],xmm10[2],xmm0[2],xmm10[3],xmm0[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT: pxor %xmm7, %xmm7
+; SSSE3-NEXT: movdqa %xmm0, %xmm11
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm7[8],xmm11[9],xmm7[9],xmm11[10],xmm7[10],xmm11[11],xmm7[11],xmm11[12],xmm7[12],xmm11[13],xmm7[13],xmm11[14],xmm7[14],xmm11[15],xmm7[15]
+; SSSE3-NEXT: movdqa %xmm11, %xmm8
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm11 = xmm11[4],xmm7[4],xmm11[5],xmm7[5],xmm11[6],xmm7[6],xmm11[7],xmm7[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
+; SSSE3-NEXT: movdqa %xmm0, %xmm10
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
; SSSE3-NEXT: movdqa %xmm2, %xmm9
-; SSSE3-NEXT: pxor %xmm0, %xmm9
-; SSSE3-NEXT: psubd %xmm5, %xmm2
-; SSSE3-NEXT: por %xmm0, %xmm5
+; SSSE3-NEXT: pxor %xmm7, %xmm9
+; SSSE3-NEXT: psubd %xmm0, %xmm2
+; SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSSE3-NEXT: por %xmm7, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm9, %xmm5
; SSSE3-NEXT: movdqa %xmm1, %xmm6
-; SSSE3-NEXT: pxor %xmm0, %xmm6
+; SSSE3-NEXT: pxor %xmm7, %xmm6
; SSSE3-NEXT: psubd %xmm10, %xmm1
-; SSSE3-NEXT: por %xmm0, %xmm10
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm10
-; SSSE3-NEXT: pshufb %xmm9, %xmm10
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm5[0],xmm10[1],xmm5[1]
+; SSSE3-NEXT: movdqa %xmm10, %xmm0
+; SSSE3-NEXT: por %xmm7, %xmm0
+; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
+; SSSE3-NEXT: pshufb %xmm9, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
; SSSE3-NEXT: movdqa %xmm4, %xmm5
-; SSSE3-NEXT: pxor %xmm0, %xmm5
-; SSSE3-NEXT: psubd %xmm7, %xmm4
-; SSSE3-NEXT: por %xmm0, %xmm7
-; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
+; SSSE3-NEXT: pxor %xmm7, %xmm5
+; SSSE3-NEXT: psubd %xmm11, %xmm4
+; SSSE3-NEXT: por %xmm7, %xmm11
+; SSSE3-NEXT: pcmpgtd %xmm5, %xmm11
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT: pshufb %xmm5, %xmm7
+; SSSE3-NEXT: pshufb %xmm5, %xmm11
; SSSE3-NEXT: movdqa %xmm3, %xmm6
-; SSSE3-NEXT: pxor %xmm0, %xmm6
-; SSSE3-NEXT: por %xmm8, %xmm0
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
-; SSSE3-NEXT: pshufb %xmm5, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
-; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm10[0],xmm0[1]
+; SSSE3-NEXT: pxor %xmm7, %xmm6
+; SSSE3-NEXT: por %xmm8, %xmm7
+; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
+; SSSE3-NEXT: pshufb %xmm5, %xmm7
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1]
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm7[1]
; SSSE3-NEXT: psubd %xmm8, %xmm3
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSSE3-NEXT: pand %xmm5, %xmm4
Modified: llvm/trunk/test/CodeGen/X86/sdiv-exact.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sdiv-exact.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sdiv-exact.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sdiv-exact.ll Sun Jul 7 23:52:43 2019
@@ -82,17 +82,16 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrad $3, %xmm1
-; X86-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X86-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X86-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,3264175145,3264175145]
-; X86-NEXT: movapd %xmm0, %xmm1
-; X86-NEXT: pmuludq %xmm2, %xmm1
-; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X86-NEXT: movapd %xmm1, %xmm0
+; X86-NEXT: pmuludq %xmm2, %xmm0
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; X86-NEXT: pmuludq %xmm0, %xmm2
-; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-NEXT: movdqa %xmm1, %xmm0
+; X86-NEXT: pmuludq %xmm1, %xmm2
+; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
;
; X64-LABEL: test5:
Modified: llvm/trunk/test/CodeGen/X86/sse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2.ll Sun Jul 7 23:52:43 2019
@@ -29,9 +29,8 @@ define void @test1(<2 x double>* %r, <2
;
; X64-SSE-LABEL: test1:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movapd (%rsi), %xmm1
-; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; X64-SSE-NEXT: movapd %xmm1, (%rdi)
+; X64-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
+; X64-SSE-NEXT: movapd %xmm0, (%rdi)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: test1:
Modified: llvm/trunk/test/CodeGen/X86/vector-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-blend.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-blend.ll Sun Jul 7 23:52:43 2019
@@ -149,14 +149,12 @@ entry:
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@@ -176,14 +174,12 @@ entry:
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@@ -340,20 +336,16 @@ define <8 x double> @vsel_double8(<8 x d
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSE2-NEXT: movapd %xmm4, %xmm0
-; SSE2-NEXT: movapd %xmm6, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double8:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
-; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSSE3-NEXT: movapd %xmm4, %xmm0
-; SSSE3-NEXT: movapd %xmm6, %xmm2
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double8:
@@ -379,20 +371,16 @@ define <8 x i64> @vsel_i648(<8 x i64> %v
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSE2-NEXT: movapd %xmm4, %xmm0
-; SSE2-NEXT: movapd %xmm6, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i648:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
-; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
-; SSSE3-NEXT: movapd %xmm4, %xmm0
-; SSSE3-NEXT: movapd %xmm6, %xmm2
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i648:
@@ -416,18 +404,14 @@ entry:
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2-LABEL: vsel_double4:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSE2-NEXT: movapd %xmm2, %xmm0
-; SSE2-NEXT: movapd %xmm3, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSSE3-NEXT: movapd %xmm2, %xmm0
-; SSSE3-NEXT: movapd %xmm3, %xmm1
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@@ -529,15 +513,13 @@ define <4 x double> @constant_blendvpd_a
; SSE2-LABEL: constant_blendvpd_avx:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSE2-NEXT: movapd %xmm3, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm2, %xmm0
-; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSSE3-NEXT: movapd %xmm3, %xmm1
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@@ -713,14 +695,12 @@ entry:
define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSSE3-NEXT: movapd %xmm2, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:
Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll Sun Jul 7 23:52:43 2019
@@ -145,20 +145,21 @@ define <2 x i32> @var_shift_v2i32(<2 x i
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT: movdqa %xmm3, %xmm0
-; X32-SSE-NEXT: psrlq %xmm1, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE-NEXT: psrlq %xmm1, %xmm3
; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; X32-SSE-NEXT: xorps %xmm5, %xmm5
; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3]
-; X32-SSE-NEXT: psrlq %xmm5, %xmm3
-; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
-; X32-SSE-NEXT: movdqa %xmm2, %xmm0
; X32-SSE-NEXT: psrlq %xmm5, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: psrlq %xmm5, %xmm3
; X32-SSE-NEXT: psrlq %xmm1, %xmm2
-; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; X32-SSE-NEXT: xorpd %xmm3, %xmm0
-; X32-SSE-NEXT: psubq %xmm3, %xmm0
+; X32-SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm3[1]
+; X32-SSE-NEXT: xorpd %xmm0, %xmm2
+; X32-SSE-NEXT: psubq %xmm0, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm0
; X32-SSE-NEXT: retl
%shift = ashr <2 x i32> %a, %b
ret <2 x i32> %shift
@@ -1057,21 +1058,22 @@ define <2 x i32> @splatvar_shift_v2i32(<
; X32-SSE-NEXT: psrad $31, %xmm0
; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0]
-; X32-SSE-NEXT: pand %xmm1, %xmm3
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT: movdqa %xmm4, %xmm0
-; X32-SSE-NEXT: psrlq %xmm3, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,0,4294967295,0]
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: psrlq %xmm0, %xmm4
; X32-SSE-NEXT: xorps %xmm5, %xmm5
; X32-SSE-NEXT: movss {{.*#+}} xmm5 = xmm1[0],xmm5[1,2,3]
-; X32-SSE-NEXT: psrlq %xmm5, %xmm4
-; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
+; X32-SSE-NEXT: psrlq %xmm5, %xmm3
+; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
+; X32-SSE-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE-NEXT: psrlq %xmm5, %xmm1
+; X32-SSE-NEXT: psrlq %xmm0, %xmm2
+; X32-SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; X32-SSE-NEXT: xorpd %xmm3, %xmm2
+; X32-SSE-NEXT: psubq %xmm3, %xmm2
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
-; X32-SSE-NEXT: psrlq %xmm5, %xmm0
-; X32-SSE-NEXT: psrlq %xmm3, %xmm2
-; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; X32-SSE-NEXT: xorpd %xmm4, %xmm0
-; X32-SSE-NEXT: psubq %xmm4, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i32> %a, %splat
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Sun Jul 7 23:52:43 2019
@@ -222,20 +222,17 @@ define <2 x double> @shuffle_v2f64_33(<2
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # %bb.0:
-; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@@ -351,20 +348,17 @@ define <2 x i64> @shuffle_v2i64_02_copy(
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # %bb.0:
-; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@@ -382,20 +376,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # %bb.0:
-; SSE2-NEXT: movapd %xmm2, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # %bb.0:
-; SSE3-NEXT: movapd %xmm2, %xmm0
-; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movapd %xmm2, %xmm0
-; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@@ -1085,20 +1079,17 @@ define <2 x i64> @insert_mem_hi_v2i64(i6
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE2-LABEL: insert_reg_lo_v2f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v2f64:
; SSE3: # %bb.0:
-; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v2f64:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v2f64:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Sun Jul 7 23:52:43 2019
@@ -2255,20 +2255,17 @@ define <4 x i32> @insert_mem_hi_v4i32(<2
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE2-LABEL: insert_reg_lo_v4f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v4f32:
; SSE3: # %bb.0:
-; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v4f32:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v4f32:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Sun Jul 7 23:52:43 2019
@@ -1260,9 +1260,9 @@ define <8 x i16> @shuffle_v8i16_443aXXXX
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_032dXXXX:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7]
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; SSE2-NEXT: retq
@@ -1459,9 +1459,9 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_012dcde3:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3,2,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7]
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sun Jul 7 23:52:43 2019
@@ -43,8 +43,7 @@ define <16 x i8> @combine_vpshufb_as_mov
define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) {
; SSSE3-LABEL: combine_pshufb_as_movsd:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_as_movsd:
@@ -669,8 +668,7 @@ declare <16 x i8> @llvm.x86.sse2.packusw
define <16 x i8> @combine_pshufb_pshufb_or_as_blend(<16 x i8> %a0, <16 x i8> %a1) {
; SSSE3-LABEL: combine_pshufb_pshufb_or_as_blend:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_pshufb_or_as_blend:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Sun Jul 7 23:52:43 2019
@@ -2132,14 +2132,12 @@ define <4 x float> @combine_undef_input_
define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@@ -2316,14 +2314,12 @@ define <4 x float> @combine_undef_input_
define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:
Modified: llvm/trunk/test/CodeGen/X86/vselect-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-2.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-2.ll Sun Jul 7 23:52:43 2019
@@ -7,8 +7,7 @@
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test1:
@@ -46,8 +45,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test3:
Modified: llvm/trunk/test/CodeGen/X86/vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect.ll Sun Jul 7 23:52:43 2019
@@ -30,8 +30,7 @@ define <4 x float> @test1(<4 x float> %a
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test2:
@@ -107,8 +106,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: test7:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test7:
@@ -392,8 +390,7 @@ define <4 x i32> @test23(<4 x i32> %a, <
define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: test24:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test24:
@@ -412,8 +409,7 @@ define <2 x double> @test24(<2 x double>
define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: test25:
; SSE2: # %bb.0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test25:
Modified: llvm/trunk/test/CodeGen/X86/x86-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-shifts.ll?rev=365292&r1=365291&r2=365292&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-shifts.ll Sun Jul 7 23:52:43 2019
@@ -223,9 +223,9 @@ define <2 x i64> @shr2_nosplat(<2 x i64>
; X32-NEXT: psrlq $8, %xmm2
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlq $1, %xmm1
-; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X32-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; X32-NEXT: xorpd %xmm0, %xmm1
+; X32-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; X32-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; X32-NEXT: xorpd %xmm2, %xmm1
; X32-NEXT: movapd %xmm1, %xmm0
; X32-NEXT: retl
;
@@ -235,9 +235,9 @@ define <2 x i64> @shr2_nosplat(<2 x i64>
; X64-NEXT: psrlq $8, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $1, %xmm1
-; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X64-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; X64-NEXT: xorpd %xmm0, %xmm1
+; X64-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
+; X64-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
+; X64-NEXT: xorpd %xmm2, %xmm1
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: retq
entry:
More information about the llvm-commits
mailing list