[llvm] r322524 - [X86][SSE] Add custom execution domain fixing for BLENDPD/BLENDPS/PBLENDD/PBLENDW (PR34873)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 14:18:46 PST 2018
Author: rksimon
Date: Mon Jan 15 14:18:45 2018
New Revision: 322524
URL: http://llvm.org/viewvc/llvm-project?rev=322524&view=rev
Log:
[X86][SSE] Add custom execution domain fixing for BLENDPD/BLENDPS/PBLENDD/PBLENDW (PR34873)
Add support for custom execution domain fixing and implement support for BLENDPD/BLENDPS/PBLENDD/PBLENDW.
Differential Revision: https://reviews.llvm.org/D42042
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.h
llvm/trunk/test/CodeGen/X86/avx-cast.ll
llvm/trunk/test/CodeGen/X86/avx-insertelt.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll
llvm/trunk/test/CodeGen/X86/combine-and.ll
llvm/trunk/test/CodeGen/X86/combine-or.ll
llvm/trunk/test/CodeGen/X86/combine-sra.ll
llvm/trunk/test/CodeGen/X86/commute-blend-avx2.ll
llvm/trunk/test/CodeGen/X86/commute-blend-sse41.ll
llvm/trunk/test/CodeGen/X86/commuted-blend-mask.ll
llvm/trunk/test/CodeGen/X86/cvtv2f32.ll
llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll
llvm/trunk/test/CodeGen/X86/insertelement-ones.ll
llvm/trunk/test/CodeGen/X86/insertelement-zero.ll
llvm/trunk/test/CodeGen/X86/masked_memop.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll
llvm/trunk/test/CodeGen/X86/oddshuffles.ll
llvm/trunk/test/CodeGen/X86/pr31956.ll
llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll
llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/sse41.ll
llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
llvm/trunk/test/CodeGen/X86/vector-blend.ll
llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/trunk/test/CodeGen/X86/vselect-2.ll
llvm/trunk/test/CodeGen/X86/vselect.ll
llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jan 15 14:18:45 2018
@@ -9694,8 +9694,6 @@ static const uint16_t ReplaceableInstrsA
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
{ X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
{ X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
- { X86::VBLENDPSrri, X86::VBLENDPSrri, X86::VPBLENDDrri },
- { X86::VBLENDPSrmi, X86::VBLENDPSrmi, X86::VPBLENDDrmi },
{ X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri },
{ X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
{ X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
@@ -9949,6 +9947,24 @@ static const uint16_t ReplaceableInstrsA
X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
};
+// NOTE: These should only be used by the custom domain methods.
+static const uint16_t ReplaceableCustomInstrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
+ { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
+ { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi },
+ { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri },
+ { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
+ { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
+};
+static const uint16_t ReplaceableCustomAVX2Instrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
+ { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
+ { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi },
+ { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
+};
+
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
@@ -9969,13 +9985,177 @@ static const uint16_t *lookupAVX512(unsi
return nullptr;
}
+// Helper to attempt to widen/narrow blend masks.
+static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth,
+ unsigned NewWidth, unsigned *pNewMask = nullptr) {
+ assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
+ "Illegal blend mask scale");
+ unsigned NewMask = 0;
+
+ if ((OldWidth % NewWidth) == 0) {
+ unsigned Scale = OldWidth / NewWidth;
+ unsigned SubMask = (1u << Scale) - 1;
+ for (unsigned i = 0; i != NewWidth; ++i) {
+ unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
+ if (Sub == SubMask)
+ NewMask |= (1u << i);
+ else if (Sub != 0x0)
+ return false;
+ }
+ } else {
+ unsigned Scale = NewWidth / OldWidth;
+ unsigned SubMask = (1u << Scale) - 1;
+ for (unsigned i = 0; i != OldWidth; ++i) {
+ if (OldMask & (1 << i)) {
+ NewMask |= (SubMask << (i * Scale));
+ }
+ }
+ }
+
+ if (pNewMask)
+ *pNewMask = NewMask;
+ return true;
+}
+
+uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOperands = MI.getNumOperands();
+
+ auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) {
+ uint16_t validDomains = 0;
+ if (MI.getOperand(NumOperands - 1).isImm()) {
+ unsigned Imm = MI.getOperand(NumOperands - 1).getImm();
+ if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4))
+ validDomains |= 0x2; // PackedSingle
+ if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2))
+ validDomains |= 0x4; // PackedDouble
+ if (!Is256 || Subtarget.hasAVX2())
+ validDomains |= 0x8; // PackedInt
+ }
+ return validDomains;
+ };
+
+ switch (Opcode) {
+ case X86::BLENDPDrmi:
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrmi:
+ case X86::VBLENDPDrri:
+ return GetBlendDomains(2, false);
+ case X86::VBLENDPDYrmi:
+ case X86::VBLENDPDYrri:
+ return GetBlendDomains(4, true);
+ case X86::BLENDPSrmi:
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrmi:
+ case X86::VBLENDPSrri:
+ case X86::VPBLENDDrmi:
+ case X86::VPBLENDDrri:
+ return GetBlendDomains(4, false);
+ case X86::VBLENDPSYrmi:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDYrmi:
+ case X86::VPBLENDDYrri:
+ return GetBlendDomains(8, true);
+ case X86::PBLENDWrmi:
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrmi:
+ case X86::VPBLENDWrri:
+ // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks.
+ case X86::VPBLENDWYrmi:
+ case X86::VPBLENDWYrri:
+ return GetBlendDomains(8, false);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
+ unsigned Domain) const {
+ assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
+ uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+
+ unsigned Opcode = MI.getOpcode();
+ unsigned NumOperands = MI.getNumOperands();
+
+ auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) {
+ if (MI.getOperand(NumOperands - 1).isImm()) {
+ unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255;
+ Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
+ unsigned NewImm = Imm;
+
+ const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs);
+ if (!table)
+ table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+
+ if (Domain == 1) { // PackedSingle
+ AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
+ } else if (Domain == 2) { // PackedDouble
+ AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm);
+ } else if (Domain == 3) { // PackedInt
+ if (Subtarget.hasAVX2()) {
+ // If we are already VPBLENDW use that, else use VPBLENDD.
+ if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
+ table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+ AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
+ }
+ } else {
+ assert(!Is256 && "128-bit vector expected");
+ AdjustBlendMask(Imm, ImmWidth, 8, &NewImm);
+ }
+ }
+
+ assert(table && table[Domain - 1] && "Unknown domain op");
+ MI.setDesc(get(table[Domain - 1]));
+ MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
+ }
+ return true;
+ };
+
+ switch (Opcode) {
+ case X86::BLENDPDrmi:
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrmi:
+ case X86::VBLENDPDrri:
+ return SetBlendDomain(2, false);
+ case X86::VBLENDPDYrmi:
+ case X86::VBLENDPDYrri:
+ return SetBlendDomain(4, true);
+ case X86::BLENDPSrmi:
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrmi:
+ case X86::VBLENDPSrri:
+ case X86::VPBLENDDrmi:
+ case X86::VPBLENDDrri:
+ return SetBlendDomain(4, false);
+ case X86::VBLENDPSYrmi:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDYrmi:
+ case X86::VPBLENDDYrri:
+ return SetBlendDomain(8, true);
+ case X86::PBLENDWrmi:
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrmi:
+ case X86::VPBLENDWrri:
+ return SetBlendDomain(8, false);
+ case X86::VPBLENDWYrmi:
+ case X86::VPBLENDWYrri:
+ return SetBlendDomain(16, true);
+ }
+ return false;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
unsigned opcode = MI.getOpcode();
uint16_t validDomains = 0;
if (domain) {
- if (lookup(MI.getOpcode(), domain, ReplaceableInstrs)) {
+ // Attempt to match for custom instructions.
+ if (validDomains = getExecutionDomainCustom(MI)) {
+ return std::make_pair(domain, validDomains);
+ }
+
+ if (lookup(opcode, domain, ReplaceableInstrs)) {
validDomains = 0xe;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
@@ -10007,6 +10187,11 @@ void X86InstrInfo::setExecutionDomain(Ma
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
+
+ // Attempt to match for custom instructions.
+ if (setExecutionDomainCustom(MI, Domain))
+ return;
+
const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
if (!table) { // try the other table
assert((Subtarget.hasAVX2() || Domain < 3) &&
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Mon Jan 15 14:18:45 2018
@@ -490,8 +490,12 @@ public:
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr &MI) const override;
+ uint16_t getExecutionDomainCustom(const MachineInstr &MI) const;
+
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override;
+ bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const;
+
unsigned
getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
Modified: llvm/trunk/test/CodeGen/X86/avx-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-cast.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-cast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-cast.ll Mon Jan 15 14:18:45 2018
@@ -21,8 +21,8 @@ define <4 x double> @castB(<2 x double>
; AVX-LABEL: castB:
; AVX: ## %bb.0:
; AVX-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX-NEXT: retq
%shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x double> %shuffle.i
@@ -31,19 +31,12 @@ define <4 x double> @castB(<2 x double>
; AVX2 is needed for integer types.
define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
-; AVX1-LABEL: castC:
-; AVX1: ## %bb.0:
-; AVX1-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: castC:
-; AVX2: ## %bb.0:
-; AVX2-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: castC:
+; AVX: ## %bb.0:
+; AVX-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT: retq
%shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x i64> %shuffle.i
}
Modified: llvm/trunk/test/CodeGen/X86/avx-insertelt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-insertelt.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-insertelt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-insertelt.ll Mon Jan 15 14:18:45 2018
@@ -16,7 +16,7 @@ define <4 x double> @insert_f64(<4 x dou
; ALL-LABEL: insert_f64:
; ALL: # %bb.0:
; ALL-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; ALL-NEXT: retq
%i0 = insertelement <4 x double> %y, double %f, i32 0
ret <4 x double> %i0
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Mon Jan 15 14:18:45 2018
@@ -141,12 +141,12 @@ define <8 x float> @test_mm256_andnot_ps
define <4 x double> @test_mm256_blend_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_blend_pd:
; X32: # %bb.0:
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_blend_pd:
; X64: # %bb.0:
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; X64-NEXT: retq
%res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
ret <4 x double> %res
@@ -1044,13 +1044,13 @@ define <4 x double> @test_mm256_insertf1
; X32-LABEL: test_mm256_insertf128_pd:
; X32: # %bb.0:
; X32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_insertf128_pd:
; X64: # %bb.0:
; X64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X64-NEXT: retq
%ext = shufflevector <2 x double> %a1, <2 x double> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%res = shufflevector <4 x double> %a0, <4 x double> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@@ -1076,13 +1076,13 @@ define <4 x i64> @test_mm256_insertf128_
; X32-LABEL: test_mm256_insertf128_si256:
; X32: # %bb.0:
; X32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_insertf128_si256:
; X64: # %bb.0:
; X64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X64-NEXT: retq
%ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Mon Jan 15 14:18:45 2018
@@ -40,7 +40,7 @@ define <8 x i32> @test_x86_avx_vinsertf1
; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1
-; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
ret <8 x i32> %res
@@ -133,7 +133,7 @@ declare <8 x float> @llvm.x86.avx.vbroad
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_blend_pd_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -188,7 +188,7 @@ declare <2 x i64> @llvm.x86.sse2.psrl.dq
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse41_blendpd:
; CHECK: # %bb.0:
-; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
Modified: llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll Mon Jan 15 14:18:45 2018
@@ -37,7 +37,7 @@ entry:
define <8 x float> @shuffle_v8f32_0123cdef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: shuffle_v8f32_0123cdef:
; ALL: # %bb.0: # %entry
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
@@ -380,8 +380,8 @@ define <4 x double> @shuffle_v4f64_zz01_
define <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_zz23:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x double> %s
@@ -389,8 +389,8 @@ define <4 x double> @shuffle_v4f64_zz23(
define <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_zz23_optsize:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x double> %s
@@ -416,8 +416,8 @@ define <4 x double> @shuffle_v4f64_zz45_
define <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_zz67:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x double> %s
@@ -425,8 +425,8 @@ define <4 x double> @shuffle_v4f64_zz67(
define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_zz67_optsize:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x double> %s
@@ -435,8 +435,8 @@ define <4 x double> @shuffle_v4f64_zz67_
define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_01zz:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %s
@@ -444,8 +444,8 @@ define <4 x double> @shuffle_v4f64_01zz(
define <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_01zz_optsize:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %s
@@ -471,8 +471,8 @@ define <4 x double> @shuffle_v4f64_23zz_
define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) {
; ALL-LABEL: shuffle_v4f64_45zz:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %s
@@ -480,8 +480,8 @@ define <4 x double> @shuffle_v4f64_45zz(
define <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize {
; ALL-LABEL: shuffle_v4f64_45zz_optsize:
; ALL: # %bb.0:
-; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %s
@@ -511,7 +511,7 @@ define <4 x i64> @shuffle_v4i64_67zz(<4
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_67zz:
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Mon Jan 15 14:18:45 2018
@@ -1112,15 +1112,15 @@ define <4 x i32> @test_masked_z_8xi32_to
%res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
ret <4 x i32> %res
}
-define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {
-; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,1]
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 5, i32 3, i32 2, i32 5>
+define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {
+; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,1]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 5, i32 3, i32 2, i32 5>
ret <4 x i32> %res
}
define <4 x i32> @test_masked_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
@@ -3084,15 +3084,15 @@ define <4 x float> @test_masked_z_8xfloa
ret <4 x float> %res
}
-define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm2
-; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2]
+define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@@ -3102,15 +3102,15 @@ define <4 x float> @test_masked_8xfloat_
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2]
+define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %ymm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@@ -3398,16 +3398,16 @@ define <4 x float> @test_masked_z_16xflo
%res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
ret <4 x float> %res
}
-define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
-; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
+; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 8, i32 6, i32 10, i32 6>
@@ -3416,16 +3416,16 @@ define <4 x float> @test_masked_16xfloat
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
-; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
+define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
+; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 8, i32 6, i32 10, i32 6>
@@ -3478,17 +3478,17 @@ define <4 x float> @test_16xfloat_to_4xf
%res = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 10, i32 2, i32 11, i32 6>
ret <4 x float> %res
}
-define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7]
-; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm3
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
-; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
+define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7]
+; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
+; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 10, i32 2, i32 11, i32 6>
@@ -3497,17 +3497,17 @@ define <4 x float> @test_masked_16xfloat
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
-; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm2
-; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
-; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
+define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
+; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 10, i32 2, i32 11, i32 6>
@@ -3700,18 +3700,18 @@ define <4 x float> @test_16xfloat_to_4xf
%res = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 14, i32 6, i32 7, i32 11>
ret <4 x float> %res
}
-define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
-; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,2,3,3]
-; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm2
-; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
-; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
-; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}
+define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm2
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[0,2,3,3]
+; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2
+; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[3,1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
+; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@@ -3721,18 +3721,18 @@ define <4 x float> @test_masked_16xfloat
ret <4 x float> %res
}
-define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) {
-; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
-; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,3,3]
-; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3]
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
-; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}
+define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) {
+; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3]
+; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
+; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
+; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Mon Jan 15 14:18:45 2018
@@ -3256,8 +3256,8 @@ declare <8 x float> @llvm.x86.avx512.mas
define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
-; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
+; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovaps %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
; CHECK-NEXT: vmovaps %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Mon Jan 15 14:18:45 2018
@@ -72,7 +72,7 @@ define <2 x double> @test_negative_zero_
;
; SSE41-LABEL: test_negative_zero_2:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
; SSE41-NEXT: retq
entry:
%0 = extractelement <2 x double> %A, i32 0
Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Mon Jan 15 14:18:45 2018
@@ -16,21 +16,15 @@ define <2 x i64> @_clearupper2xi64a(<2 x
;
; SSE42-LABEL: _clearupper2xi64a:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm1, %xmm1
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE42-NEXT: xorps %xmm1, %xmm1
+; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: _clearupper2xi64a:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper2xi64a:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper2xi64a:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
%x0 = extractelement <2 x i64> %0, i32 0
%x1 = extractelement <2 x i64> %0, i32 1
%trunc0 = trunc i64 %x0 to i32
@@ -52,9 +46,9 @@ define <4 x i64> @_clearupper4xi64a(<4 x
;
; SSE42-LABEL: _clearupper4xi64a:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm2, %xmm2
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; SSE42-NEXT: xorps %xmm2, %xmm2
+; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SSE42-NEXT: retq
;
; AVX-LABEL: _clearupper4xi64a:
@@ -673,21 +667,15 @@ define <2 x i64> @_clearupper2xi64b(<2 x
;
; SSE42-LABEL: _clearupper2xi64b:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm1, %xmm1
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE42-NEXT: xorps %xmm1, %xmm1
+; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: _clearupper2xi64b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper2xi64b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper2xi64b:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
%x32 = bitcast <2 x i64> %0 to <4 x i32>
%r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
%r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3
@@ -705,9 +693,9 @@ define <4 x i64> @_clearupper4xi64b(<4 x
;
; SSE42-LABEL: _clearupper4xi64b:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm2, %xmm2
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; SSE42-NEXT: xorps %xmm2, %xmm2
+; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SSE42-NEXT: retq
;
; AVX-LABEL: _clearupper4xi64b:
@@ -1639,21 +1627,15 @@ define <2 x i64> @_clearupper2xi64c(<2 x
;
; SSE42-LABEL: _clearupper2xi64c:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm1, %xmm1
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE42-NEXT: xorps %xmm1, %xmm1
+; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SSE42-NEXT: retq
;
-; AVX1-LABEL: _clearupper2xi64c:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper2xi64c:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper2xi64c:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
%r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
ret <2 x i64> %r
}
@@ -1668,9 +1650,9 @@ define <4 x i64> @_clearupper4xi64c(<4 x
;
; SSE42-LABEL: _clearupper4xi64c:
; SSE42: # %bb.0:
-; SSE42-NEXT: pxor %xmm2, %xmm2
-; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; SSE42-NEXT: xorps %xmm2, %xmm2
+; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SSE42-NEXT: retq
;
; AVX-LABEL: _clearupper4xi64c:
Modified: llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/coalesce_commute_movsd.ll Mon Jan 15 14:18:45 2018
@@ -15,12 +15,12 @@ define <2 x double> @insert_f64(double %
;
; SSE41-LABEL: insert_f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
;
; AVX512-LABEL: insert_f64:
Modified: llvm/trunk/test/CodeGen/X86/combine-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-and.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-and.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-and.ll Mon Jan 15 14:18:45 2018
@@ -27,8 +27,8 @@ define <4 x i32> @and_self_vec(<4 x i32>
define <4 x i32> @test1(<4 x i32> %A) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
ret <4 x i32> %1
@@ -37,8 +37,8 @@ define <4 x i32> @test1(<4 x i32> %A) {
define <4 x i32> @test2(<4 x i32> %A) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0>
ret <4 x i32> %1
@@ -47,8 +47,8 @@ define <4 x i32> @test2(<4 x i32> %A) {
define <4 x i32> @test3(<4 x i32> %A) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0>
ret <4 x i32> %1
@@ -57,8 +57,8 @@ define <4 x i32> @test3(<4 x i32> %A) {
define <4 x i32> @test4(<4 x i32> %A) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1>
ret <4 x i32> %1
@@ -67,8 +67,8 @@ define <4 x i32> @test4(<4 x i32> %A) {
define <4 x i32> @test5(<4 x i32> %A) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
ret <4 x i32> %1
@@ -77,8 +77,8 @@ define <4 x i32> @test5(<4 x i32> %A) {
define <4 x i32> @test6(<4 x i32> %A) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
ret <4 x i32> %1
@@ -87,8 +87,8 @@ define <4 x i32> @test6(<4 x i32> %A) {
define <4 x i32> @test7(<4 x i32> %A) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1>
ret <4 x i32> %1
@@ -97,8 +97,8 @@ define <4 x i32> @test7(<4 x i32> %A) {
define <4 x i32> @test8(<4 x i32> %A) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1>
ret <4 x i32> %1
@@ -116,8 +116,8 @@ define <4 x i32> @test9(<4 x i32> %A) {
define <4 x i32> @test10(<4 x i32> %A) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0>
ret <4 x i32> %1
@@ -126,8 +126,8 @@ define <4 x i32> @test10(<4 x i32> %A) {
define <4 x i32> @test11(<4 x i32> %A) {
; CHECK-LABEL: test11:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %1
@@ -136,8 +136,8 @@ define <4 x i32> @test11(<4 x i32> %A) {
define <4 x i32> @test12(<4 x i32> %A) {
; CHECK-LABEL: test12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0>
ret <4 x i32> %1
@@ -146,8 +146,8 @@ define <4 x i32> @test12(<4 x i32> %A) {
define <4 x i32> @test13(<4 x i32> %A) {
; CHECK-LABEL: test13:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1>
ret <4 x i32> %1
@@ -156,8 +156,8 @@ define <4 x i32> @test13(<4 x i32> %A) {
define <4 x i32> @test14(<4 x i32> %A) {
; CHECK-LABEL: test14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
ret <4 x i32> %1
@@ -166,7 +166,7 @@ define <4 x i32> @test14(<4 x i32> %A) {
define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test15:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
%2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0>
@@ -177,7 +177,7 @@ define <4 x i32> @test15(<4 x i32> %A, <
define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
%2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1>
@@ -188,7 +188,7 @@ define <4 x i32> @test16(<4 x i32> %A, <
define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test17:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
%2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0>
Modified: llvm/trunk/test/CodeGen/X86/combine-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-or.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-or.ll Mon Jan 15 14:18:45 2018
@@ -24,7 +24,7 @@ define <4 x i32> @or_self_vec(<4 x i32>
define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
@@ -36,7 +36,7 @@ define <2 x i64> @test1(<2 x i64> %a, <2
define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -48,7 +48,7 @@ define <4 x i32> @test2(<4 x i32> %a, <4
define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
@@ -60,7 +60,7 @@ define <2 x i64> @test3(<2 x i64> %a, <2
define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
@@ -72,7 +72,7 @@ define <4 x i32> @test4(<4 x i32> %a, <4
define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
@@ -84,7 +84,7 @@ define <4 x i32> @test5(<4 x i32> %a, <4
define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
@@ -96,7 +96,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4
define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
@@ -108,7 +108,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4
define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 -1, i64 0>
%and2 = and <2 x i64> %b, <i64 0, i64 -1>
@@ -120,7 +120,7 @@ define <2 x i64> @test8(<2 x i64> %a, <2
define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test9:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
@@ -132,7 +132,7 @@ define <4 x i32> @test9(<4 x i32> %a, <4
define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%and1 = and <2 x i64> %a, <i64 0, i64 -1>
%and2 = and <2 x i64> %b, <i64 -1, i64 0>
@@ -144,7 +144,7 @@ define <2 x i64> @test10(<2 x i64> %a, <
define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test11:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
@@ -156,7 +156,7 @@ define <4 x i32> @test11(<4 x i32> %a, <
define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
@@ -299,7 +299,7 @@ define <2 x i64> @test21(<2 x i64> %a, <
define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test22:
; CHECK: # %bb.0:
-; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%bc1 = bitcast <2 x double> %a0 to <2 x i64>
%bc2 = bitcast <2 x double> %a1 to <2 x i64>
@@ -329,7 +329,7 @@ define <4 x float> @test23(<4 x float> %
define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test24:
; CHECK: # %bb.0:
-; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%bc1 = bitcast <4 x float> %a0 to <2 x i64>
%bc2 = bitcast <4 x float> %a1 to <2 x i64>
@@ -362,7 +362,7 @@ define <4 x float> @test25(<4 x float> %
define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_crash:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -375,7 +375,7 @@ define <4 x i8> @test_crash(<4 x i8> %a,
define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2b:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -386,7 +386,7 @@ define <4 x i32> @test2b(<4 x i32> %a, <
define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2c:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
%shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
@@ -398,7 +398,7 @@ define <4 x i32> @test2c(<4 x i32> %a, <
define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2d:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
@@ -411,7 +411,7 @@ define <4 x i32> @test2d(<4 x i32> %a, <
define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2e:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -422,7 +422,7 @@ define <4 x i32> @test2e(<4 x i32> %a, <
define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2f:
; CHECK: # %bb.0:
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4>
Modified: llvm/trunk/test/CodeGen/X86/combine-sra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sra.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sra.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sra.ll Mon Jan 15 14:18:45 2018
@@ -215,7 +215,7 @@ define <4 x i32> @combine_vec_ashr_trunc
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: psrad $2, %xmm1
-; SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE-NEXT: psrad $3, %xmm0
; SSE-NEXT: psrad $1, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7]
@@ -258,7 +258,7 @@ define <4 x i32> @combine_vec_ashr_trunc
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: psrad $2, %xmm1
-; SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE-NEXT: psrad $3, %xmm0
; SSE-NEXT: psrad $1, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7]
Modified: llvm/trunk/test/CodeGen/X86/commute-blend-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-blend-avx2.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-blend-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-blend-avx2.ll Mon Jan 15 14:18:45 2018
@@ -70,7 +70,7 @@ declare <8 x float> @llvm.x86.avx.blend.
define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b) #0 {
; CHECK-LABEL: commute_fold_vblendpd_128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
; CHECK-NEXT: retq
%1 = load <2 x double>, <2 x double>* %b
%2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
@@ -81,7 +81,7 @@ declare <2 x double> @llvm.x86.sse41.ble
define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, <4 x double>* %b) #0 {
; CHECK-LABEL: commute_fold_vblendpd_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],mem[3]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],mem[6,7]
; CHECK-NEXT: retq
%1 = load <4 x double>, <4 x double>* %b
%2 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %1, <4 x double> %a, i8 7)
Modified: llvm/trunk/test/CodeGen/X86/commute-blend-sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-blend-sse41.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-blend-sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-blend-sse41.ll Mon Jan 15 14:18:45 2018
@@ -26,7 +26,7 @@ declare <4 x float> @llvm.x86.sse41.blen
define <2 x double> @commute_fold_blendpd(<2 x double> %a, <2 x double>* %b) #0 {
; CHECK-LABEL: commute_fold_blendpd:
; CHECK: # %bb.0:
-; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
; CHECK-NEXT: retq
%1 = load <2 x double>, <2 x double>* %b
%2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
Modified: llvm/trunk/test/CodeGen/X86/commuted-blend-mask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commuted-blend-mask.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commuted-blend-mask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commuted-blend-mask.ll Mon Jan 15 14:18:45 2018
@@ -10,5 +10,7 @@
define <4 x i32> @test(<4 x i32> %a, <4 x i32> %b) {
; CHECK: pblendw $63, %xmm1, %xmm0
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
- ret <4 x i32> %shuffle
+ ; add forces execution domain
+ %sum = add <4 x i32> %shuffle, %shuffle
+ ret <4 x i32> %sum
}
Modified: llvm/trunk/test/CodeGen/X86/cvtv2f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cvtv2f32.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cvtv2f32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cvtv2f32.ll Mon Jan 15 14:18:45 2018
@@ -72,10 +72,10 @@ define <2 x float> @uitofp_2i32_buildvec
define <2 x float> @uitofp_2i32_legalized(<2 x i32> %in, <2 x float> %v) {
; X32-LABEL: uitofp_2i32_legalized:
; X32: # %bb.0:
-; X32-NEXT: pxor %xmm2, %xmm2
-; X32-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; X32-NEXT: movdqa {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15]
-; X32-NEXT: por %xmm0, %xmm2
+; X32-NEXT: xorps %xmm2, %xmm2
+; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; X32-NEXT: movaps {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15]
+; X32-NEXT: orps %xmm0, %xmm2
; X32-NEXT: subpd %xmm0, %xmm2
; X32-NEXT: cvtpd2ps %xmm2, %xmm0
; X32-NEXT: mulps %xmm1, %xmm0
@@ -83,10 +83,10 @@ define <2 x float> @uitofp_2i32_legalize
;
; X64-LABEL: uitofp_2i32_legalized:
; X64: # %bb.0:
-; X64-NEXT: pxor %xmm2, %xmm2
-; X64-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; X64-NEXT: movdqa {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15]
-; X64-NEXT: por %xmm0, %xmm2
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; X64-NEXT: movaps {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15]
+; X64-NEXT: orps %xmm0, %xmm2
; X64-NEXT: subpd %xmm0, %xmm2
; X64-NEXT: cvtpd2ps %xmm2, %xmm0
; X64-NEXT: mulps %xmm1, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-into-constant-vector.ll Mon Jan 15 14:18:45 2018
@@ -440,9 +440,9 @@ define <8 x double> @elt1_v8f64(double %
;
; X64AVX2-LABEL: elt1_v8f64:
; X64AVX2: # %bb.0:
-; X64AVX2-NEXT: vmovapd {{.*#+}} ymm1 = <42,u,2,3>
-; X64AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <42,u,2,3>
+; X64AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
; X64AVX2-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/insertelement-ones.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-ones.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-ones.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertelement-ones.ll Mon Jan 15 14:18:45 2018
@@ -77,7 +77,7 @@ define <4 x i64> @insert_v4i64_01x3(<4 x
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_v4i64_01x3:
Modified: llvm/trunk/test/CodeGen/X86/insertelement-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-zero.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insertelement-zero.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insertelement-zero.ll Mon Jan 15 14:18:45 2018
@@ -28,14 +28,14 @@ define <2 x double> @insert_v2f64_z1(<2
;
; SSE41-LABEL: insert_v2f64_z1:
; SSE41: # %bb.0:
-; SSE41-NEXT: xorpd %xmm1, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_v2f64_z1:
; AVX: # %bb.0:
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%1 = insertelement <2 x double> %a, double 0.0, i32 0
ret <2 x double> %1
@@ -66,14 +66,14 @@ define <4 x double> @insert_v4f64_0zz3(<
; SSE41-LABEL: insert_v4f64_0zz3:
; SSE41: # %bb.0:
; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
-; SSE41-NEXT: xorpd %xmm2, %xmm2
-; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE41-NEXT: xorps %xmm2, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_v4f64_0zz3:
; AVX: # %bb.0:
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; AVX-NEXT: retq
%1 = insertelement <4 x double> %a, double 0.0, i32 1
%2 = insertelement <4 x double> %1, double 0.0, i32 2
@@ -101,21 +101,15 @@ define <2 x i64> @insert_v2i64_z1(<2 x i
;
; SSE41-LABEL: insert_v2i64_z1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_v2i64_z1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_v2i64_z1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: insert_v2i64_z1:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%1 = insertelement <2 x i64> %a, i64 0, i32 0
ret <2 x i64> %1
}
@@ -141,21 +135,15 @@ define <4 x i64> @insert_v4i64_01z3(<4 x
;
; SSE41-LABEL: insert_v4i64_01z3:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: xorps %xmm2, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_v4i64_01z3:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_v4i64_01z3:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: insert_v4i64_01z3:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX-NEXT: retq
%1 = insertelement <4 x i64> %a, i64 0, i32 2
ret <4 x i64> %1
}
@@ -263,21 +251,15 @@ define <4 x i32> @insert_v4i32_01z3(<4 x
;
; SSE41-LABEL: insert_v4i32_01z3:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_v4i32_01z3:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_v4i32_01z3:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: insert_v4i32_01z3:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX-NEXT: retq
%1 = insertelement <4 x i32> %a, i32 0, i32 2
ret <4 x i32> %1
}
@@ -312,9 +294,9 @@ define <8 x i32> @insert_v8i32_z12345z7(
;
; SSE41-LABEL: insert_v8i32_z12345z7:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: xorps %xmm2, %xmm2
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_v8i32_z12345z7:
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Mon Jan 15 14:18:45 2018
@@ -835,7 +835,7 @@ define <4 x i32> @mload_constmask_v4i32(
; AVX1: ## %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
; AVX1-NEXT: vmaskmovps (%rdi), %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: mload_constmask_v4i32:
@@ -963,15 +963,10 @@ define <8 x i32> @mload_constmask_v8i32(
}
define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) {
-; AVX1-LABEL: mload_constmask_v4i64:
-; AVX1: ## %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = mem[0],ymm0[1,2],mem[3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: mload_constmask_v4i64:
-; AVX2: ## %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: mload_constmask_v4i64:
+; AVX: ## %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7]
+; AVX-NEXT: retq
;
; AVX512F-LABEL: mload_constmask_v4i64:
; AVX512F: ## %bb.0:
@@ -997,8 +992,8 @@ define <4 x i64> @mload_constmask_v4i64(
define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) {
; AVX-LABEL: mload_constmask_v8f64:
; AVX: ## %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1,2],mem[3]
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3]
+; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7]
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6,7]
; AVX-NEXT: retq
;
; AVX512F-LABEL: mload_constmask_v8f64:
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-256.ll Mon Jan 15 14:18:45 2018
@@ -129,15 +129,15 @@ define <4 x double> @merge_4f64_f64_45zz
define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_34z6:
; AVX: # %bb.0:
-; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7]
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_f64_34z6:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
+; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 3
%ptr1 = getelementptr inbounds double, double* %ptr, i64 4
@@ -262,8 +262,8 @@ define <8 x float> @merge_8f32_2f32_23z5
; X32-AVX-LABEL: merge_8f32_2f32_23z5:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3]
+; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7]
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2
%ptr1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3
Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Mon Jan 15 14:18:45 2018
@@ -105,10 +105,10 @@ define void @v3i32(<2 x i32> %a, <2 x i3
;
; AVX1-LABEL: v3i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: vpextrd $2, %xmm0, 8(%rdi)
-; AVX1-NEXT: vmovq %xmm1, (%rdi)
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX1-NEXT: vextractps $2, %xmm0, 8(%rdi)
+; AVX1-NEXT: vmovlps %xmm1, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: v3i32:
@@ -121,10 +121,10 @@ define void @v3i32(<2 x i32> %a, <2 x i3
;
; XOP-LABEL: v3i32:
; XOP: # %bb.0:
-; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; XOP-NEXT: vpextrd $2, %xmm0, 8(%rdi)
-; XOP-NEXT: vmovq %xmm1, (%rdi)
+; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOP-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; XOP-NEXT: vextractps $2, %xmm0, 8(%rdi)
+; XOP-NEXT: vmovlps %xmm1, (%rdi)
; XOP-NEXT: retq
%r = shufflevector <2 x i32> %a, <2 x i32> %b, <3 x i32> <i32 0, i32 2, i32 1>
store <3 x i32> %r, <3 x i32>* %p
@@ -665,38 +665,38 @@ define void @v12i32(<8 x i32> %a, <8 x i
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3]
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1]
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
-; AVX1-NEXT: vmovapd %xmm0, 32(%rdi)
+; AVX1-NEXT: vmovaps %xmm0, 32(%rdi)
; AVX1-NEXT: vmovaps %ymm2, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: v12i32:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3]
-; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[3,3,2,3,7,7,6,7]
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,3]
-; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
-; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm3 = <0,4,u,1,5,u,2,6>
-; AVX2-SLOW-NEXT: vpermps %ymm0, %ymm3, %ymm0
-; AVX2-SLOW-NEXT: vbroadcastsd %xmm1, %ymm1
-; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
-; AVX2-SLOW-NEXT: vmovaps %ymm0, (%rdi)
-; AVX2-SLOW-NEXT: vmovaps %xmm2, 32(%rdi)
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
+; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[3,3,2,3,7,7,6,7]
+; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
+; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
+; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <0,4,u,1,5,u,2,6>
+; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm3, %ymm0
+; AVX2-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
+; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdi)
+; AVX2-SLOW-NEXT: vmovdqa %xmm2, 32(%rdi)
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: v12i32:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <0,4,u,1,5,u,2,6>
-; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm2
-; AVX2-FAST-NEXT: vbroadcastsd %xmm1, %ymm3
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
-; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [3,3,7,7,7,7,6,7]
-; AVX2-FAST-NEXT: vpermps %ymm0, %ymm3, %ymm0
-; AVX2-FAST-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
-; AVX2-FAST-NEXT: vmovaps %xmm0, 32(%rdi)
-; AVX2-FAST-NEXT: vmovaps %ymm2, (%rdi)
+; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,4,u,1,5,u,2,6>
+; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm3
+; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
+; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [3,3,7,7,7,7,6,7]
+; AVX2-FAST-NEXT: vpermd %ymm0, %ymm3, %ymm0
+; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
+; AVX2-FAST-NEXT: vmovdqa %xmm0, 32(%rdi)
+; AVX2-FAST-NEXT: vmovdqa %ymm2, (%rdi)
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
@@ -711,7 +711,7 @@ define void @v12i32(<8 x i32> %a, <8 x i
; XOP-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3]
; XOP-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1]
; XOP-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
-; XOP-NEXT: vmovapd %xmm0, 32(%rdi)
+; XOP-NEXT: vmovaps %xmm0, 32(%rdi)
; XOP-NEXT: vmovaps %ymm2, (%rdi)
; XOP-NEXT: vzeroupper
; XOP-NEXT: retq
@@ -1381,7 +1381,7 @@ define void @interleave_24i32_out(<24 x
; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm4[2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
; SSE42-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm2[2,3]
-; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5],xmm8[6,7]
+; SSE42-NEXT: blendps {{.*#+}} xmm4 = xmm4[0,1,2],xmm8[3]
; SSE42-NEXT: movdqa %xmm10, %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm5[2,3],xmm1[4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1]
@@ -1401,7 +1401,7 @@ define void @interleave_24i32_out(<24 x
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm9[0,1,0,3]
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; SSE42-NEXT: movdqu %xmm3, 16(%rsi)
-; SSE42-NEXT: movdqu %xmm4, (%rsi)
+; SSE42-NEXT: movups %xmm4, (%rsi)
; SSE42-NEXT: movdqu %xmm5, 16(%rdx)
; SSE42-NEXT: movdqu %xmm7, (%rdx)
; SSE42-NEXT: movdqu %xmm2, 16(%rcx)
@@ -1422,7 +1422,7 @@ define void @interleave_24i32_out(<24 x
; AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,3,2,1]
; AVX1-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[0,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
-; AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1,2],ymm4[3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7]
; AVX1-NEXT: vblendps {{.*#+}} xmm5 = xmm2[0,1],xmm3[2],xmm2[3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,0,3,2]
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
@@ -1442,7 +1442,7 @@ define void @interleave_24i32_out(<24 x
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5,6,7]
-; AVX1-NEXT: vmovupd %ymm4, (%rsi)
+; AVX1-NEXT: vmovups %ymm4, (%rsi)
; AVX1-NEXT: vmovups %ymm5, (%rdx)
; AVX1-NEXT: vmovups %ymm0, (%rcx)
; AVX1-NEXT: vzeroupper
@@ -1520,7 +1520,7 @@ define void @interleave_24i32_out(<24 x
; XOP-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,3,2,1]
; XOP-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[0,3,2,3]
; XOP-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
-; XOP-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1,2],ymm4[3]
+; XOP-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7]
; XOP-NEXT: vblendps {{.*#+}} xmm5 = xmm2[0,1],xmm3[2],xmm2[3]
; XOP-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,0,3,2]
; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
@@ -1540,7 +1540,7 @@ define void @interleave_24i32_out(<24 x
; XOP-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5,6,7]
-; XOP-NEXT: vmovupd %ymm4, (%rsi)
+; XOP-NEXT: vmovups %ymm4, (%rsi)
; XOP-NEXT: vmovups %ymm5, (%rdx)
; XOP-NEXT: vmovups %ymm0, (%rcx)
; XOP-NEXT: vzeroupper
@@ -1674,8 +1674,8 @@ define void @interleave_24i32_in(<24 x i
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3],ymm0[4],ymm2[5,6],ymm0[7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
-; AVX1-NEXT: vmovupd %ymm0, 32(%rdi)
-; AVX1-NEXT: vmovupd %ymm4, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm4, 64(%rdi)
; AVX1-NEXT: vmovups %ymm3, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1763,7 +1763,7 @@ define void @interleave_24i32_in(<24 x i
; XOP-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; XOP-NEXT: vmovups %ymm0, 32(%rdi)
-; XOP-NEXT: vmovupd %ymm4, 64(%rdi)
+; XOP-NEXT: vmovups %ymm4, 64(%rdi)
; XOP-NEXT: vmovups %ymm3, (%rdi)
; XOP-NEXT: vzeroupper
; XOP-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/pr31956.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr31956.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr31956.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr31956.ll Mon Jan 15 14:18:45 2018
@@ -10,7 +10,7 @@ define <4 x float> @foo() {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7]
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Modified: llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll Mon Jan 15 14:18:45 2018
@@ -7,8 +7,8 @@ define <4 x i64> @autogen_SD88863() {
; X32: # %bb.0: # %BB
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
; X32-NEXT: movb $1, %al
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB0_1: # %CF
@@ -22,8 +22,8 @@ define <4 x i64> @autogen_SD88863() {
; X64: # %bb.0: # %BB
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
; X64-NEXT: movb $1, %al
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_1: # %CF
Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Mon Jan 15 14:18:45 2018
@@ -1152,8 +1152,8 @@ define <2 x double> @add_sd_mask(<2 x do
; SSE41-NEXT: testb $1, %dil
; SSE41-NEXT: jne .LBB63_1
; SSE41-NEXT: # %bb.2:
-; SSE41-NEXT: movapd %xmm2, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: movaps %xmm2, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
; SSE41-NEXT: .LBB63_1:
; SSE41-NEXT: addsd %xmm0, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll Mon Jan 15 14:18:45 2018
@@ -24,12 +24,12 @@ define <2 x i64> @test_mm_blend_epi16(<2
define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_blend_pd:
; X32: # %bb.0:
-; X32-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_blend_pd:
; X64: # %bb.0:
-; X64-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3>
ret <2 x double> %res
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll Mon Jan 15 14:18:45 2018
@@ -7,7 +7,7 @@
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse41_blendpd:
; CHECK: ## %bb.0:
-; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 6) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Mon Jan 15 14:18:45 2018
@@ -564,14 +564,14 @@ define <4 x float> @shuf_X0YC(<4 x float
define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XYZ0:
; X32: ## %bb.0:
-; X32-NEXT: pxor %xmm1, %xmm1
-; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; X32-NEXT: xorps %xmm1, %xmm1
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XYZ0:
; X64: ## %bb.0:
-; X64-NEXT: pxor %xmm1, %xmm1
-; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
Modified: llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll Mon Jan 15 14:18:45 2018
@@ -144,19 +144,19 @@ define void @legal_vzmovl_2i64_4i64(<2 x
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovupd (%ecx), %xmm0
-; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; X32-NEXT: vmovapd %ymm0, (%eax)
+; X32-NEXT: vmovups (%ecx), %xmm0
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: legal_vzmovl_2i64_4i64:
; X64: # %bb.0:
-; X64-NEXT: vmovupd (%rdi), %xmm0
-; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; X64-NEXT: vmovapd %ymm0, (%rsi)
+; X64-NEXT: vmovups (%rdi), %xmm0
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; X64-NEXT: vmovaps %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ld = load <2 x i64>, <2 x i64>* %in, align 8
@@ -196,19 +196,19 @@ define void @legal_vzmovl_2f64_4f64(<2 x
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovupd (%ecx), %xmm0
-; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; X32-NEXT: vmovapd %ymm0, (%eax)
+; X32-NEXT: vmovups (%ecx), %xmm0
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: legal_vzmovl_2f64_4f64:
; X64: # %bb.0:
-; X64-NEXT: vmovupd (%rdi), %xmm0
-; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; X64-NEXT: vmovapd %ymm0, (%rsi)
+; X64-NEXT: vmovups (%rdi), %xmm0
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; X64-NEXT: vmovaps %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ld = load <2 x double>, <2 x double>* %in, align 8
Modified: llvm/trunk/test/CodeGen/X86/vector-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-blend.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-blend.ll Mon Jan 15 14:18:45 2018
@@ -76,18 +76,13 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1,
;
; SSE41-LABEL: vsel_4xi8:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: vsel_4xi8:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: vsel_4xi8:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: vsel_4xi8:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX-NEXT: retq
entry:
%vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
ret <4 x i8> %vsel
@@ -110,18 +105,13 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %
;
; SSE41-LABEL: vsel_4xi16:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: vsel_4xi16:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: vsel_4xi16:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: vsel_4xi16:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX-NEXT: retq
entry:
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
ret <4 x i16> %vsel
@@ -144,18 +134,13 @@ define <4 x i32> @vsel_i32(<4 x i32> %v1
;
; SSE41-LABEL: vsel_i32:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: vsel_i32:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: vsel_i32:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: vsel_i32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
entry:
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
ret <4 x i32> %vsel
@@ -176,12 +161,12 @@ define <2 x double> @vsel_double(<2 x do
;
; SSE41-LABEL: vsel_double:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_double:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
entry:
%vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
@@ -203,18 +188,13 @@ define <2 x i64> @vsel_i64(<2 x i64> %v1
;
; SSE41-LABEL: vsel_i64:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: vsel_i64:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: vsel_i64:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: vsel_i64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX-NEXT: retq
entry:
%vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
ret <2 x i64> %vsel
@@ -342,8 +322,8 @@ define <8 x i32> @vsel_i328(<8 x i32> %v
;
; SSE41-LABEL: vsel_i328:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i328:
@@ -378,16 +358,16 @@ define <8 x double> @vsel_double8(<8 x d
;
; SSE41-LABEL: vsel_double8:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
-; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSE41-NEXT: movaps %xmm5, %xmm1
; SSE41-NEXT: movaps %xmm7, %xmm3
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_double8:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3]
-; AVX-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3]
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
+; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
; AVX-NEXT: retq
entry:
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
@@ -417,23 +397,17 @@ define <8 x i64> @vsel_i648(<8 x i64> %v
;
; SSE41-LABEL: vsel_i648:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
+; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSE41-NEXT: movaps %xmm5, %xmm1
; SSE41-NEXT: movaps %xmm7, %xmm3
; SSE41-NEXT: retq
;
-; AVX1-LABEL: vsel_i648:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: vsel_i648:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
-; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: vsel_i648:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
+; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
+; AVX-NEXT: retq
entry:
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
ret <8 x i64> %vsel
@@ -458,13 +432,13 @@ define <4 x double> @vsel_double4(<4 x d
;
; SSE41-LABEL: vsel_double4:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
-; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_double4:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; AVX-NEXT: retq
entry:
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
@@ -568,13 +542,13 @@ define <4 x double> @constant_blendvpd_a
;
; SSE41-LABEL: constant_blendvpd_avx:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSE41-NEXT: movaps %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: constant_blendvpd_avx:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
; AVX-NEXT: retq
entry:
%select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
@@ -752,12 +726,12 @@ define <4 x double> @blend_shufflevector
;
; SSE41-LABEL: blend_shufflevector_4xdouble:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: blend_shufflevector_4xdouble:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX-NEXT: retq
entry:
%select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
@@ -779,19 +753,14 @@ define <4 x i64> @blend_shufflevector_4x
;
; SSE41-LABEL: blend_shufflevector_4xi64:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: retq
;
-; AVX1-LABEL: blend_shufflevector_4xi64:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: blend_shufflevector_4xi64:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: blend_shufflevector_4xi64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
+; AVX-NEXT: retq
entry:
%select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
ret <4 x i64> %select
Modified: llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll Mon Jan 15 14:18:45 2018
@@ -72,16 +72,16 @@ define i64 @extract_any_extend_vector_in
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
; X32-AVX-NEXT: movl 40(%ebp), %ecx
; X32-AVX-NEXT: vbroadcastsd 32(%ebp), %ymm0
-; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovapd %ymm0, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vmovapd %ymm1, (%esp)
-; X32-AVX-NEXT: vmovapd %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
+; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
; X32-AVX-NEXT: andl $31, %eax
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax
@@ -101,12 +101,12 @@ define i64 @extract_any_extend_vector_in
; X64-AVX-NEXT: subq $256, %rsp # imm = 0x100
; X64-AVX-NEXT: # kill: def %edi killed %edi def %rdi
; X64-AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm3[3,1,2,3]
-; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; X64-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX-NEXT: vmovapd %ymm1, (%rsp)
-; X64-AVX-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; X64-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: vmovaps %ymm1, (%rsp)
+; X64-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
; X64-AVX-NEXT: andl $15, %edi
; X64-AVX-NEXT: movq (%rsp,%rdi,8), %rax
; X64-AVX-NEXT: movq %rbp, %rsp
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Mon Jan 15 14:18:45 2018
@@ -240,17 +240,17 @@ define <2 x double> @shuffle_v2f64_03(<2
;
; SSE41-LABEL: shuffle_v2f64_03:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_03:
; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_03:
; AVX2: # %bb.0:
-; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_03:
@@ -278,17 +278,17 @@ define <2 x double> @shuffle_v2f64_21(<2
;
; SSE41-LABEL: shuffle_v2f64_21:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_21:
; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_21:
; AVX2: # %bb.0:
-; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_21:
@@ -389,23 +389,13 @@ define <2 x i64> @shuffle_v2i64_03(<2 x
;
; SSE41-LABEL: shuffle_v2i64_03:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_03:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_03:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_03:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_03:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
@@ -430,24 +420,14 @@ define <2 x i64> @shuffle_v2i64_03_copy(
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
+; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_03_copy:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_03_copy:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_03_copy:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_03_copy:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
@@ -586,23 +566,13 @@ define <2 x i64> @shuffle_v2i64_21(<2 x
;
; SSE41-LABEL: shuffle_v2i64_21:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_21:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_21:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_21:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_21:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
@@ -627,24 +597,14 @@ define <2 x i64> @shuffle_v2i64_21_copy(
;
; SSE41-LABEL: shuffle_v2i64_21_copy:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
+; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_21_copy:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_21_copy:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_21_copy:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_21_copy:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
@@ -802,14 +762,14 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x
;
; SSE41-LABEL: shuffle_v2i64_z1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v2i64_z1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2i64_z1:
@@ -919,26 +879,26 @@ define <2 x double> @shuffle_v2f64_z1(<2
;
; SSE41-LABEL: shuffle_v2f64_z1:
; SSE41: # %bb.0:
-; SSE41-NEXT: xorpd %xmm1, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_z1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_z1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_z1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
ret <2 x double> %shuffle
@@ -993,14 +953,14 @@ define <2 x i64> @shuffle_v2i64_bitcast_
;
; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
@@ -1235,17 +1195,17 @@ define <2 x double> @insert_reg_lo_v2f64
;
; SSE41-LABEL: insert_reg_lo_v2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: insert_reg_lo_v2f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_reg_lo_v2f64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: insert_reg_lo_v2f64:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Mon Jan 15 14:18:45 2018
@@ -356,8 +356,8 @@ define <4 x i32> @shuffle_v4i32_0124(<4
;
; AVX1-LABEL: shuffle_v4i32_0124:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v4i32_0124:
@@ -396,9 +396,9 @@ define <4 x i32> @shuffle_v4i32_0142(<4
;
; AVX1-LABEL: shuffle_v4i32_0142:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v4i32_0142:
@@ -441,9 +441,9 @@ define <4 x i32> @shuffle_v4i32_0412(<4
;
; AVX1-LABEL: shuffle_v4i32_0412:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v4i32_0412:
@@ -483,17 +483,11 @@ define <4 x i32> @shuffle_v4i32_4012(<4
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v4i32_4012:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v4i32_4012:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2]
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: shuffle_v4i32_4012:
+; AVX: # %bb.0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
@@ -538,9 +532,9 @@ define <4 x i32> @shuffle_v4i32_0451(<4
;
; AVX1-LABEL: shuffle_v4i32_0451:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v4i32_0451:
@@ -594,9 +588,9 @@ define <4 x i32> @shuffle_v4i32_4015(<4
;
; AVX1-LABEL: shuffle_v4i32_4015:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v4i32_4015:
@@ -1191,14 +1185,14 @@ define <4 x i32> @shuffle_v4i32_4zzz(<4
;
; SSE41-LABEL: shuffle_v4i32_4zzz:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE41-NEXT: retq
;
; AVX1OR2-LABEL: shuffle_v4i32_4zzz:
; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i32_4zzz:
@@ -1241,16 +1235,16 @@ define <4 x i32> @shuffle_v4i32_z4zz(<4
;
; AVX1-LABEL: shuffle_v4i32_z4zz:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: shuffle_v4i32_z4zz:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: shuffle_v4i32_z4zz:
@@ -1297,16 +1291,16 @@ define <4 x i32> @shuffle_v4i32_zz4z(<4
;
; AVX1-LABEL: shuffle_v4i32_zz4z:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: shuffle_v4i32_zz4z:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: shuffle_v4i32_zz4z:
@@ -1367,9 +1361,9 @@ define <4 x i32> @shuffle_v4i32_z6zz(<4
;
; AVX1-LABEL: shuffle_v4i32_z6zz:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: shuffle_v4i32_z6zz:
@@ -1580,19 +1574,12 @@ define <4 x i32> @shuffle_v4i32_2456(<4
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v4i32_2456:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,2]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v4i32_2456:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,2]
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: shuffle_v4i32_2456:
+; AVX: # %bb.0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,2]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: retq
%s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %s2
@@ -1739,21 +1726,15 @@ define <4 x i32> @shuffle_v4i32_0z23(<4
;
; SSE41-LABEL: shuffle_v4i32_0z23:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v4i32_0z23:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v4i32_0z23:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: shuffle_v4i32_0z23:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
ret <4 x i32> %shuffle
}
@@ -1776,21 +1757,15 @@ define <4 x i32> @shuffle_v4i32_01z3(<4
;
; SSE41-LABEL: shuffle_v4i32_01z3:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v4i32_01z3:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v4i32_01z3:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: shuffle_v4i32_01z3:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
ret <4 x i32> %shuffle
}
@@ -1813,21 +1788,15 @@ define <4 x i32> @shuffle_v4i32_012z(<4
;
; SSE41-LABEL: shuffle_v4i32_012z:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v4i32_012z:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v4i32_012z:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: shuffle_v4i32_012z:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %shuffle
}
@@ -1850,21 +1819,15 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4
;
; SSE41-LABEL: shuffle_v4i32_0zz3:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v4i32_0zz3:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v4i32_0zz3:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: shuffle_v4i32_0zz3:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
ret <4 x i32> %shuffle
}
@@ -2031,18 +1994,13 @@ define <4 x i32> @mask_v4i32_0127(<4 x i
;
; SSE41-LABEL: mask_v4i32_0127:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: mask_v4i32_0127:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: mask_v4i32_0127:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: mask_v4i32_0127:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
+; AVX-NEXT: retq
%1 = bitcast <4 x i32> %a to <2 x i64>
%2 = bitcast <4 x i32> %b to <2 x i64>
%3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
@@ -2234,21 +2192,15 @@ define <4 x i32> @insert_mem_lo_v4i32(<2
;
; SSE41-LABEL: insert_mem_lo_v4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_mem_lo_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: insert_mem_lo_v4i32:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: insert_mem_lo_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%a = load <2 x i32>, <2 x i32>* %ptr
%v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -2312,12 +2264,12 @@ define <4 x float> @insert_reg_lo_v4f32(
;
; SSE41-LABEL: insert_reg_lo_v4f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX1OR2-LABEL: insert_reg_lo_v4f32:
; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: insert_reg_lo_v4f32:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Mon Jan 15 14:18:45 2018
@@ -2493,18 +2493,13 @@ define <8 x i16> @mask_v8i16_012345ef(<8
;
; SSE41-LABEL: mask_v8i16_012345ef:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: mask_v8i16_012345ef:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: mask_v8i16_012345ef:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
-; AVX2OR512VL-NEXT: retq
+; AVX-LABEL: mask_v8i16_012345ef:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
+; AVX-NEXT: retq
%1 = bitcast <8 x i16> %a to <2 x i64>
%2 = bitcast <8 x i16> %b to <2 x i64>
%3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Mon Jan 15 14:18:45 2018
@@ -887,15 +887,10 @@ define <16 x i16> @shuffle_v16i16_00_01_
}
define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
-; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; ALL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %shuffle
}
@@ -3113,7 +3108,7 @@ define <16 x i16> @shuffle_v16i16_01_zz_
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
@@ -4469,15 +4464,10 @@ define <16 x i16> @insert_v16i16_0elt_in
}
define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
-; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; ALL-NEXT: retq
%alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Mon Jan 15 14:18:45 2018
@@ -1284,7 +1284,7 @@ define <32 x i8> @shuffle_v32i8_01_zz_02
; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Mon Jan 15 14:18:45 2018
@@ -367,7 +367,7 @@ define <4 x double> @shuffle_v4f64_5163(
define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_0527:
; ALL: # %bb.0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %shuffle
@@ -376,7 +376,7 @@ define <4 x double> @shuffle_v4f64_0527(
define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_4163:
; ALL: # %bb.0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x double> %shuffle
@@ -403,7 +403,7 @@ define <4 x double> @shuffle_v4f64_4501(
define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_0167:
; ALL: # %bb.0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x double> %shuffle
@@ -461,7 +461,7 @@ define <4 x double> @shuffle_v4f64_0415(
; AVX2: # %bb.0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4f64_0415:
@@ -588,8 +588,8 @@ define <4 x double> @shuffle_v4f64_1z2z(
;
; AVX2-SLOW-LABEL: shuffle_v4f64_1z2z:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX2-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
; AVX2-SLOW-NEXT: retq
;
@@ -601,8 +601,8 @@ define <4 x double> @shuffle_v4f64_1z2z(
; AVX512VL-SLOW-LABEL: shuffle_v4f64_1z2z:
; AVX512VL-SLOW: # %bb.0:
; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
-; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
+; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0]
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v4f64_1z2z:
@@ -825,7 +825,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_0124:
@@ -915,7 +915,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4
; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_4012:
@@ -1339,15 +1339,15 @@ define <4 x double> @insert_reg_and_zero
; AVX1-LABEL: insert_reg_and_zero_v4f64:
; AVX1: # %bb.0:
; AVX1-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_reg_and_zero_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
-; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
@@ -1512,20 +1512,10 @@ define <4 x double> @bitcast_v4f64_0426(
}
define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
-; AVX1-LABEL: concat_v4i64_0167:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: concat_v4i64_0167:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: concat_v4i64_0167:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: concat_v4i64_0167:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; ALL-NEXT: retq
%a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
%a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
%shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -1777,12 +1767,12 @@ define <4 x i64> @add_v4i64_0246_1357(<4
; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm1[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm5 = xmm0[0],xmm4[0]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm3[2,3]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm5 = xmm0[0],xmm4[0]
+; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1
@@ -1830,12 +1820,12 @@ define <4 x i64> @add_v4i64_4602_5713(<4
; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm5 = xmm1[0],xmm4[0]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm3[2,3]
+; AVX1-NEXT: vmovlhps {{.*#+}} xmm5 = xmm1[0],xmm4[0]
+; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Mon Jan 15 14:18:45 2018
@@ -110,7 +110,7 @@ define <8 x float> @shuffle_v8f32_060000
; AVX1-LABEL: shuffle_v8f32_06000000:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
@@ -127,7 +127,7 @@ define <8 x float> @shuffle_v8f32_700000
; AVX1-LABEL: shuffle_v8f32_70000000:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
@@ -663,7 +663,7 @@ define <8 x float> @shuffle_v8f32_c348cd
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
; AVX1-NEXT: retq
;
@@ -830,7 +830,7 @@ define <8 x float> @shuffle_v8f32_3210ba
define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_3210fedc:
; ALL: # %bb.0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
@@ -865,7 +865,7 @@ define <8 x float> @PR21138(<8 x float>
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR21138:
@@ -892,7 +892,7 @@ define <8 x float> @PR21138(<8 x float>
define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_ba987654:
; ALL: # %bb.0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
@@ -1106,7 +1106,7 @@ define <8 x i32> @shuffle_v8i32_06000000
; AVX1-LABEL: shuffle_v8i32_06000000:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
@@ -1123,7 +1123,7 @@ define <8 x i32> @shuffle_v8i32_70000000
; AVX1-LABEL: shuffle_v8i32_70000000:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
@@ -1913,17 +1913,11 @@ define <8 x i32> @shuffle_v8i32_3210ba98
}
define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_3210fedc:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_3210fedc:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_3210fedc:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
ret <8 x i32> %shuffle
}
@@ -1961,33 +1955,21 @@ define <8 x i32> @shuffle_v8i32_fedc7654
}
define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_ba987654:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_ba987654:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_ba987654:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_ba983210:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_ba983210:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_ba983210:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
}
@@ -2313,15 +2295,10 @@ define <8 x i32> @insert_mem_and_zero_v8
}
define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: concat_v8i32_0123CDEF:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: concat_v8i32_0123CDEF:
-; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: concat_v8i32_0123CDEF:
+; ALL: # %bb.0:
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; ALL-NEXT: retq
%alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -2428,11 +2405,11 @@ define <8 x float> @add_v8f32_02468ACE_1
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm3[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: vaddps %ymm0, %ymm3, %ymm0
; AVX1-NEXT: retq
;
@@ -2477,11 +2454,11 @@ define <8 x float> @add_v8f32_8ACE0246_9
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm3[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX1-NEXT: vaddps %ymm0, %ymm3, %ymm0
; AVX1-NEXT: retq
;
@@ -2526,11 +2503,11 @@ define <8 x i32> @add_v8i32_02468ACE_135
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm3[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
@@ -2579,11 +2556,11 @@ define <8 x i32> @add_v8i32_8ACE0246_9BD
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm3[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Mon Jan 15 14:18:45 2018
@@ -274,7 +274,7 @@ define <16 x i32> @shuffle_v16i32_load_0
define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
; ALL: # %bb.0:
-; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i32> %c
@@ -296,10 +296,10 @@ define <8 x i32> @test_v16i32_1_3_5_7_9_
define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) {
; ALL-LABEL: test_v16i32_0_1_2_12:
; ALL: # %bb.0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1
-; ALL-NEXT: vbroadcastss %xmm1, %xmm1
-; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %xmm1
+; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 12>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Mon Jan 15 14:18:45 2018
@@ -2684,7 +2684,7 @@ define <4 x double> @test_v8f64_2346 (<8
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2]
-; AVX512F-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX512F-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: test_v8f64_2346:
@@ -2692,7 +2692,7 @@ define <4 x double> @test_v8f64_2346 (<8
; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2]
-; AVX512F-32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX512F-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512F-32-NEXT: retl
%res = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
ret <4 x double> %res
@@ -2744,7 +2744,7 @@ define <2 x i64> @test_v8i64_2_5 (<8 x i
; AVX512F-LABEL: test_v8i64_2_5:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3]
; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX512F-NEXT: vzeroupper
@@ -2753,7 +2753,7 @@ define <2 x i64> @test_v8i64_2_5 (<8 x i
; AVX512F-32-LABEL: test_v8i64_2_5:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX512F-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3]
; AVX512F-32-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX512F-32-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll Mon Jan 15 14:18:45 2018
@@ -91,8 +91,8 @@ define <4 x double> @expand2(<2 x double
; KNL64: # %bb.0:
; KNL64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
-; KNL64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; KNL64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; KNL64-NEXT: retq
;
; SKX32-LABEL: expand2:
@@ -107,8 +107,8 @@ define <4 x double> @expand2(<2 x double
; KNL32: # %bb.0:
; KNL32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
-; KNL32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; KNL32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; KNL32-NEXT: retl
%res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 2, i32 1>
ret <4 x double> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll Mon Jan 15 14:18:45 2018
@@ -203,14 +203,14 @@ define <8 x float> @combine_vpermilvar_v
define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) {
; X32-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; X32: # %bb.0:
-; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; X64: # %bb.0:
-; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X64-NEXT: retq
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
%2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Mon Jan 15 14:18:45 2018
@@ -522,8 +522,8 @@ define <4 x i64> @combine_pshufb_as_zext
define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
; X32-AVX2-LABEL: combine_pshufb_as_vzmovl_64:
; X32-AVX2: # %bb.0:
-; X32-AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X32-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; X32-AVX2-NEXT: retl
;
; X32-AVX512-LABEL: combine_pshufb_as_vzmovl_64:
@@ -534,8 +534,8 @@ define <4 x double> @combine_pshufb_as_v
;
; X64-AVX2-LABEL: combine_pshufb_as_vzmovl_64:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: combine_pshufb_as_vzmovl_64:
@@ -978,20 +978,20 @@ define internal fastcc <8 x float> @PR34
; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; X32-AVX2-NEXT: vmovapd {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
-; X32-AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; X32-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
+; X32-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; X32-AVX2-NEXT: retl
;
; X32-AVX512-LABEL: PR34577:
; X32-AVX512: # %bb.0: # %entry
-; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
+; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
; X32-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0
-; X32-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3]
-; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
+; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; X32-AVX512-NEXT: retl
;
; X64-AVX2-LABEL: PR34577:
@@ -999,20 +999,20 @@ define internal fastcc <8 x float> @PR34
; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; X64-AVX2-NEXT: vmovapd {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
-; X64-AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
+; X64-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: PR34577:
; X64-AVX512: # %bb.0: # %entry
-; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
+; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
; X64-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0
-; X64-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3]
-; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
+; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; X64-AVX512-NEXT: retq
entry:
%shuf0 = shufflevector <8 x float> %inp0, <8 x float> %inp2, <8 x i32> <i32 1, i32 10, i32 11, i32 13, i32 2, i32 13, i32 5, i32 0>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Mon Jan 15 14:18:45 2018
@@ -49,17 +49,17 @@ define <2 x double> @combine_pshufb_as_m
;
; SSE41-LABEL: combine_pshufb_as_movsd:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: combine_pshufb_as_movsd:
; AVX1: # %bb.0:
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_pshufb_as_movsd:
; AVX2: # %bb.0:
-; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: combine_pshufb_as_movsd:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Mon Jan 15 14:18:45 2018
@@ -298,21 +298,15 @@ define <4 x i32> @combine_bitwise_ops_te
;
; SSE41-LABEL: combine_bitwise_ops_test1b:
; SSE41: # %bb.0:
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT: andps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_bitwise_ops_test1b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_bitwise_ops_test1b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_bitwise_ops_test1b:
+; AVX: # %bb.0:
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%and = and <4 x i32> %shuf1, %shuf2
@@ -338,21 +332,15 @@ define <4 x i32> @combine_bitwise_ops_te
;
; SSE41-LABEL: combine_bitwise_ops_test2b:
; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT: orps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_bitwise_ops_test2b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_bitwise_ops_test2b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_bitwise_ops_test2b:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%or = or <4 x i32> %shuf1, %shuf2
@@ -374,24 +362,17 @@ define <4 x i32> @combine_bitwise_ops_te
;
; SSE41-LABEL: combine_bitwise_ops_test3b:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm0
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_bitwise_ops_test3b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_bitwise_ops_test3b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_bitwise_ops_test3b:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%xor = xor <4 x i32> %shuf1, %shuf2
@@ -417,21 +398,15 @@ define <4 x i32> @combine_bitwise_ops_te
;
; SSE41-LABEL: combine_bitwise_ops_test4b:
; SSE41: # %bb.0:
-; SSE41-NEXT: pand %xmm1, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE41-NEXT: andps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_bitwise_ops_test4b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_bitwise_ops_test4b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_bitwise_ops_test4b:
+; AVX: # %bb.0:
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%and = and <4 x i32> %shuf1, %shuf2
@@ -457,21 +432,15 @@ define <4 x i32> @combine_bitwise_ops_te
;
; SSE41-LABEL: combine_bitwise_ops_test5b:
; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE41-NEXT: orps %xmm1, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_bitwise_ops_test5b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_bitwise_ops_test5b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_bitwise_ops_test5b:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%or = or <4 x i32> %shuf1, %shuf2
@@ -493,24 +462,17 @@ define <4 x i32> @combine_bitwise_ops_te
;
; SSE41-LABEL: combine_bitwise_ops_test6b:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: xorps %xmm1, %xmm0
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_bitwise_ops_test6b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_bitwise_ops_test6b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_bitwise_ops_test6b:
+; AVX: # %bb.0:
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
%xor = xor <4 x i32> %shuf1, %shuf2
@@ -904,9 +866,9 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX1-LABEL: combine_nested_undef_test15:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test15:
@@ -941,17 +903,11 @@ define <4 x i32> @combine_nested_undef_t
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_nested_undef_test16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_nested_undef_test16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_nested_undef_test16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
ret <4 x i32> %2
@@ -976,17 +932,11 @@ define <4 x i32> @combine_nested_undef_t
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_nested_undef_test17:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_nested_undef_test17:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_nested_undef_test17:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
ret <4 x i32> %2
@@ -1026,17 +976,11 @@ define <4 x i32> @combine_nested_undef_t
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_nested_undef_test19:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_nested_undef_test19:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_nested_undef_test19:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
ret <4 x i32> %2
@@ -1063,17 +1007,11 @@ define <4 x i32> @combine_nested_undef_t
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_nested_undef_test20:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_nested_undef_test20:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,3,0]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_nested_undef_test20:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,3,0]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
ret <4 x i32> %2
@@ -1100,8 +1038,8 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX1-LABEL: combine_nested_undef_test21:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test21:
@@ -1362,18 +1300,13 @@ define <4 x i32> @combine_test7(<4 x i32
;
; SSE41-LABEL: combine_test7:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_test7:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_test7:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_test7:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
ret <4 x i32> %2
@@ -1425,18 +1358,13 @@ define <4 x i32> @combine_test10(<4 x i3
;
; SSE41-LABEL: combine_test10:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_test10:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_test10:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_test10:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %2
@@ -1559,18 +1487,13 @@ define <4 x i32> @combine_test17(<4 x i3
;
; SSE41-LABEL: combine_test17:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_test17:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_test17:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_test17:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
ret <4 x i32> %2
@@ -1621,18 +1544,13 @@ define <4 x i32> @combine_test20(<4 x i3
;
; SSE41-LABEL: combine_test20:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: combine_test20:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: combine_test20:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: combine_test20:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
ret <4 x i32> %2
@@ -1760,13 +1678,13 @@ define <4 x float> @combine_test3b(<4 x
;
; SSE41-LABEL: combine_test3b:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_test3b:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
@@ -2020,12 +1938,12 @@ define <4 x float> @combine_blend_01(<4
;
; SSE41-LABEL: combine_blend_01:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_blend_01:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
%shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
@@ -2154,12 +2072,12 @@ define <4 x float> @combine_undef_input_
;
; SSE41-LABEL: combine_undef_input_test1:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test1:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
@@ -2226,12 +2144,12 @@ define <4 x float> @combine_undef_input_
;
; SSE41-LABEL: combine_undef_input_test5:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test5:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
@@ -2338,12 +2256,12 @@ define <4 x float> @combine_undef_input_
;
; SSE41-LABEL: combine_undef_input_test11:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test11:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
%2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
@@ -2410,12 +2328,12 @@ define <4 x float> @combine_undef_input_
;
; SSE41-LABEL: combine_undef_input_test15:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test15:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
%2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
@@ -2866,8 +2784,8 @@ define <8 x float> @PR22412(<8 x float>
;
; SSE41-LABEL: PR22412:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
-; SSE41-NEXT: movapd %xmm0, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
+; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2]
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2]
; SSE41-NEXT: movaps %xmm1, %xmm0
@@ -2876,22 +2794,22 @@ define <8 x float> @PR22412(<8 x float>
;
; AVX1-LABEL: PR22412:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0],ymm1[3,2],ymm0[5,4],ymm1[7,6]
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: PR22412:
; AVX2-SLOW: # %bb.0: # %entry
-; AVX2-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: PR22412:
; AVX2-FAST: # %bb.0: # %entry
-; AVX2-FAST-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2]
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/vselect-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-2.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-2.ll Mon Jan 15 14:18:45 2018
@@ -13,18 +13,13 @@ define <4 x i32> @test1(<4 x i32> %A, <4
;
; SSE41-LABEL: test1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test1:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX-NEXT: retq
%select = select <4 x i1><i1 true, i1 true, i1 false, i1 false>, <4 x i32> %A, <4 x i32> %B
ret <4 x i32> %select
}
@@ -37,18 +32,13 @@ define <4 x i32> @test2(<4 x i32> %A, <4
;
; SSE41-LABEL: test2:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test2:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test2:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test2:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%select = select <4 x i1><i1 false, i1 false, i1 true, i1 true>, <4 x i32> %A, <4 x i32> %B
ret <4 x i32> %select
}
@@ -62,12 +52,12 @@ define <4 x float> @test3(<4 x float> %A
;
; SSE41-LABEL: test3:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: test3:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
%select = select <4 x i1><i1 true, i1 true, i1 false, i1 false>, <4 x float> %A, <4 x float> %B
ret <4 x float> %select
@@ -81,12 +71,12 @@ define <4 x float> @test4(<4 x float> %A
;
; SSE41-LABEL: test4:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: test4:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%select = select <4 x i1><i1 false, i1 false, i1 true, i1 true>, <4 x float> %A, <4 x float> %B
ret <4 x float> %select
Modified: llvm/trunk/test/CodeGen/X86/vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect.ll Mon Jan 15 14:18:45 2018
@@ -36,12 +36,12 @@ define <4 x float> @test2(<4 x float> %a
;
; SSE41-LABEL: test2:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: test2:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -55,12 +55,12 @@ define <4 x float> @test3(<4 x float> %a
;
; SSE41-LABEL: test3:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: test3:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -113,18 +113,13 @@ define <8 x i16> @test7(<8 x i16> %a, <8
;
; SSE41-LABEL: test7:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test7:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test7:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test7:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX-NEXT: retq
%1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %1
}
@@ -137,18 +132,13 @@ define <8 x i16> @test8(<8 x i16> %a, <8
;
; SSE41-LABEL: test8:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test8:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test8:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %1
}
@@ -310,18 +300,13 @@ define <4 x i32> @test19(<4 x i32> %a, <
;
; SSE41-LABEL: test19:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test19:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test19:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test19:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: retq
%1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %1
}
@@ -334,12 +319,12 @@ define <2 x double> @test20(<2 x double>
;
; SSE41-LABEL: test20:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: test20:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX-NEXT: retq
%1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
@@ -353,18 +338,13 @@ define <2 x i64> @test21(<2 x i64> %a, <
;
; SSE41-LABEL: test21:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test21:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test21:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test21:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX-NEXT: retq
%1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %1
}
@@ -398,18 +378,13 @@ define <4 x i32> @test23(<4 x i32> %a, <
;
; SSE41-LABEL: test23:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test23:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test23:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test23:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %1
}
@@ -423,12 +398,12 @@ define <2 x double> @test24(<2 x double>
;
; SSE41-LABEL: test24:
; SSE41: # %bb.0:
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: test24:
; AVX: # %bb.0:
-; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
@@ -443,18 +418,13 @@ define <2 x i64> @test25(<2 x i64> %a, <
;
; SSE41-LABEL: test25:
; SSE41: # %bb.0:
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: test25:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test25:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX2-NEXT: retq
+; AVX-LABEL: test25:
+; AVX: # %bb.0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %1
}
Modified: llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll?rev=322524&r1=322523&r2=322524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll Mon Jan 15 14:18:45 2018
@@ -691,7 +691,7 @@ define <32 x i1> @interleaved_load_vf32_
; AVX1-NEXT: vpshufb %xmm0, %xmm10, %xmm0
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm8 = ymm0[0,1],ymm8[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm8 = ymm0[0,1,2,3],ymm8[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm0, %xmm11, %xmm4
; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm5
@@ -709,7 +709,7 @@ define <32 x i1> @interleaved_load_vf32_
; AVX1-NEXT: vpshufb %xmm5, %xmm10, %xmm5
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm9 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = <u,u,u,u,2,6,10,14,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm0, %xmm11, %xmm1
; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
@@ -727,7 +727,7 @@ define <32 x i1> @interleaved_load_vf32_
; AVX1-NEXT: vpshufb %xmm4, %xmm10, %xmm4
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <u,u,u,u,3,7,11,15,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm1, %xmm11, %xmm4
; AVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm3
@@ -745,7 +745,7 @@ define <32 x i1> @interleaved_load_vf32_
; AVX1-NEXT: vpshufb %xmm4, %xmm10, %xmm4
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3]
+; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; AVX1-NEXT: vpcmpeqb %xmm9, %xmm8, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm8, %xmm4
More information about the llvm-commits
mailing list