[llvm] a8ad917 - [X86] Fix handling of maskmovdqu in X32
Harald van Dijk via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 15 14:56:36 PDT 2021
Author: Harald van Dijk
Date: 2021-07-15T22:56:08+01:00
New Revision: a8ad9170543906fc58336ab736a109fb42082fbf
URL: https://github.com/llvm/llvm-project/commit/a8ad9170543906fc58336ab736a109fb42082fbf
DIFF: https://github.com/llvm/llvm-project/commit/a8ad9170543906fc58336ab736a109fb42082fbf.diff
LOG: [X86] Fix handling of maskmovdqu in X32
The maskmovdqu instruction is an odd one: it has a 32-bit and a 64-bit
variant, the former using EDI, the latter RDI, but the use of the
register is implicit. In 64-bit mode, a 0x67 prefix can be used to get
the version using EDI, but there is no way to express this in
assembly in a single instruction, the only way is with an explicit
addr32.
This change adds support for the instruction. When generating assembly
text, that explicit addr32 will be added. When not generating assembly
text, it will be kept as a single instruction and will be emitted with
that 0x67 prefix. When parsing assembly text, it will be re-parsed as
ADDR32 followed by MASKMOVDQU64, which still results in the correct
bytes when converted to machine code.
The same applies to vmaskmovdqu as well.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D103427
Added:
llvm/test/MC/X86/maskmovdqu.s
llvm/test/MC/X86/maskmovdqu64.s
Modified:
llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/Target/X86/X86ScheduleBtVer2.td
llvm/test/CodeGen/X86/maskmovdqu.ll
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/utils/TableGen/X86DisassemblerTables.cpp
llvm/utils/TableGen/X86RecognizableInstr.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index db8206cf2e3d9..757a3c0c8a712 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -116,6 +116,8 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_64BIT_VEX_OPSIZE, 4, "requires 64-bit mode and VEX") \
+ ENUM_ENTRY(IC_64BIT_VEX_OPSIZE_ADSIZE, 5, "requires 64-bit mode, VEX, and AdSize")\
ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4e6d8e8e1a54e..82581eb3c30a2 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1119,6 +1119,8 @@ static int getInstructionID(struct InternalInstruction *insn,
switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
+ if (insn->hasAdSize)
+ attrMask |= ATTR_ADSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
@@ -1175,6 +1177,8 @@ static int getInstructionID(struct InternalInstruction *insn,
case 0x66:
if (insn->mode != MODE_16BIT)
attrMask |= ATTR_OPSIZE;
+ if (insn->hasAdSize)
+ attrMask |= ATTR_ADSIZE;
break;
case 0x67:
attrMask |= ATTR_ADSIZE;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index ae40b712edf5a..41fda603d5a9f 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4011,7 +4011,15 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
- VEX, VEX_WIG;
+ VEX, VEX_WIG, AdSize64;
+let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in
+def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask), "",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
+ VEX, VEX_WIG, AdSize32 {
+ let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}";
+ let AsmVariantName = "NonParsable";
+}
let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -4020,7 +4028,15 @@ def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
+ AdSize64;
+let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in
+def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "addr32 maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
+ AdSize32 {
+ let AsmVariantName = "NonParsable";
+}
} // ExeDomain = SSEPackedInt
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index ef156b573154f..cdd03830bcad3 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -835,8 +835,8 @@ def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JAL
let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
let NumMicroOps = 63;
}
-def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
- VMASKMOVDQU, VMASKMOVDQU64)>;
+def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32,
+ VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>;
///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
diff --git a/llvm/test/CodeGen/X86/maskmovdqu.ll b/llvm/test/CodeGen/X86/maskmovdqu.ll
index a8443e44a16d6..898b0e9c54a25 100644
--- a/llvm/test/CodeGen/X86/maskmovdqu.ll
+++ b/llvm/test/CodeGen/X86/maskmovdqu.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=i686_SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_64_SSE2
+; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_x32_SSE2
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=i686_AVX
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=x86_64_AVX
+; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+avx | FileCheck %s --check-prefix=x86_x32_AVX
; rdar://6573467
define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
@@ -20,6 +22,13 @@ define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
; x86_64_SSE2-NEXT: maskmovdqu %xmm1, %xmm0
; x86_64_SSE2-NEXT: retq
;
+; x86_x32_SSE2-LABEL: test:
+; x86_x32_SSE2: # %bb.0: # %entry
+; x86_x32_SSE2-NEXT: movq %rsi, %rdi
+; x86_x32_SSE2-NEXT: # kill: def $edi killed $edi killed $rdi
+; x86_x32_SSE2-NEXT: addr32 maskmovdqu %xmm1, %xmm0
+; x86_x32_SSE2-NEXT: retq
+;
; i686_AVX-LABEL: test:
; i686_AVX: # %bb.0: # %entry
; i686_AVX-NEXT: pushl %edi
@@ -33,6 +42,12 @@ define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
; x86_64_AVX-NEXT: movq %rsi, %rdi
; x86_64_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0
; x86_64_AVX-NEXT: retq
+; x86_x32_AVX-LABEL: test:
+; x86_x32_AVX: # %bb.0: # %entry
+; x86_x32_AVX-NEXT: movq %rsi, %rdi
+; x86_x32_AVX-NEXT: # kill: def $edi killed $edi killed $rdi
+; x86_x32_AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0
+; x86_x32_AVX-NEXT: retq
entry:
tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c )
ret void
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index e3051f669e18a..eaed72299bce2 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -5,6 +5,9 @@
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
+; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE,X32-SSE
+; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX1,X32-AVX1
+; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX512,X32-AVX512
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
@@ -473,6 +476,11 @@ define void @test_mm_clflush(i8* %a0) nounwind {
; X64: # %bb.0:
; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f]
; X64-NEXT: retq # encoding: [0xc3]
+;
+; X32-LABEL: test_mm_clflush:
+; X32: # %bb.0:
+; X32-NEXT: clflush (%edi) # encoding: [0x67,0x0f,0xae,0x3f]
+; X32-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.sse2.clflush(i8* %a0)
ret void
}
@@ -1497,6 +1505,10 @@ define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
; X64-LABEL: test_mm_cvtsd_f64:
; X64: # %bb.0:
; X64-NEXT: retq # encoding: [0xc3]
+;
+; X32-LABEL: test_mm_cvtsd_f64:
+; X32: # %bb.0:
+; X32-NEXT: retq # encoding: [0xc3]
%res = extractelement <2 x double> %a0, i32 0
ret double %res
}
@@ -1574,6 +1586,21 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_cvtsd_ss_load:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: cvtsd2ss (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x5a,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_cvtsd_ss_load:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xfb,0x5a,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_cvtsd_ss_load:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x5a,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
ret <4 x float> %res
@@ -1629,6 +1656,21 @@ define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_cvtsi32_sd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_cvtsi32_sd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_cvtsi32_sd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%cvt = sitofp i32 %a1 to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
@@ -1667,6 +1709,21 @@ define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_cvtsi32_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_cvtsi32_si128:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_cvtsi32_si128:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 0, i32 1
%res2 = insertelement <4 x i32> %res1, i32 0, i32 2
@@ -1856,6 +1913,21 @@ define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_insert_epi16:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_insert_epi16:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_insert_epi16:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
%bc = bitcast <8 x i16> %res to <2 x i64>
@@ -1905,6 +1977,21 @@ define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_load_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_load_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_load_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%res = load <2 x double>, <2 x double>* %arg0, align 16
ret <2 x double> %res
@@ -1949,6 +2036,24 @@ define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_load_sd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07]
+; X32-SSE-NEXT: # xmm0 = mem[0],zero
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_load_sd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07]
+; X32-AVX1-NEXT: # xmm0 = mem[0],zero
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_load_sd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07]
+; X32-AVX512-NEXT: # xmm0 = mem[0],zero
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a0, align 1
%res0 = insertelement <2 x double> undef, double %ld, i32 0
%res1 = insertelement <2 x double> %res0, double 0.0, i32 1
@@ -1988,6 +2093,21 @@ define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_load_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_load_si128:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_load_si128:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res = load <2 x i64>, <2 x i64>* %a0, align 16
ret <2 x i64> %res
}
@@ -2035,6 +2155,26 @@ define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0,0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_load1_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07]
+; X32-SSE-NEXT: # xmm0 = mem[0],zero
+; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_load1_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovddup (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x12,0x07]
+; X32-AVX1-NEXT: # xmm0 = mem[0,0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_load1_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovddup (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x12,0x07]
+; X32-AVX512-NEXT: # xmm0 = mem[0,0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a0, align 8
%res0 = insertelement <2 x double> undef, double %ld, i32 0
%res1 = insertelement <2 x double> %res0, double %ld, i32 1
@@ -2080,6 +2220,24 @@ define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07]
; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadh_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movhps (%edi), %xmm0 # encoding: [0x67,0x0f,0x16,0x07]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadh_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovhps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x16,0x07]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadh_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovhps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x16,0x07]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a1, align 8
%res = insertelement <2 x double> %a0, double %ld, i32 1
ret <2 x double> %res
@@ -2124,6 +2282,24 @@ define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadl_epi64:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07]
+; X32-SSE-NEXT: # xmm0 = mem[0],zero
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadl_epi64:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07]
+; X32-AVX1-NEXT: # xmm0 = mem[0],zero
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadl_epi64:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07]
+; X32-AVX512-NEXT: # xmm0 = mem[0],zero
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%bc = bitcast <2 x i64>* %a1 to i64*
%ld = load i64, i64* %bc, align 1
%res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
@@ -2170,6 +2346,24 @@ define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadl_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlps (%edi), %xmm0 # encoding: [0x67,0x0f,0x12,0x07]
+; X32-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadl_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovlps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x12,0x07]
+; X32-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadl_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovlps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x12,0x07]
+; X32-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ld = load double, double* %a1, align 8
%res = insertelement <2 x double> %a0, double %ld, i32 0
ret <2 x double> %res
@@ -2216,6 +2410,25 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
; X64-AVX512-NEXT: # xmm0 = mem[1,0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadr_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07]
+; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
+; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadr_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vpermilpd $1, (%edi), %xmm0 # encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01]
+; X32-AVX1-NEXT: # xmm0 = mem[1,0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadr_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpermilpd $1, (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01]
+; X32-AVX512-NEXT: # xmm0 = mem[1,0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%ld = load <2 x double>, <2 x double>* %arg0, align 16
%res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
@@ -2255,6 +2468,21 @@ define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadu_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadu_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadu_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%res = load <2 x double>, <2 x double>* %arg0, align 1
ret <2 x double> %res
@@ -2293,6 +2521,21 @@ define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadu_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadu_si128:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadu_si128:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res = load <2 x i64>, <2 x i64>* %a0, align 1
ret <2 x i64> %res
}
@@ -2336,6 +2579,24 @@ define <2 x i64> @test_mm_loadu_si64(i8* nocapture readonly %A) {
; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadu_si64:
+; X32-SSE: # %bb.0: # %entry
+; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07]
+; X32-SSE-NEXT: # xmm0 = mem[0],zero
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadu_si64:
+; X32-AVX1: # %bb.0: # %entry
+; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07]
+; X32-AVX1-NEXT: # xmm0 = mem[0],zero
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadu_si64:
+; X32-AVX512: # %bb.0: # %entry
+; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07]
+; X32-AVX512-NEXT: # xmm0 = mem[0],zero
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%__v.i = bitcast i8* %A to i64*
%0 = load i64, i64* %__v.i, align 1
@@ -2382,6 +2643,24 @@ define <2 x i64> @test_mm_loadu_si32(i8* nocapture readonly %A) {
; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadu_si32:
+; X32-SSE: # %bb.0: # %entry
+; X32-SSE-NEXT: movss (%edi), %xmm0 # encoding: [0x67,0xf3,0x0f,0x10,0x07]
+; X32-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadu_si32:
+; X32-AVX1: # %bb.0: # %entry
+; X32-AVX1-NEXT: vmovss (%edi), %xmm0 # encoding: [0x67,0xc5,0xfa,0x10,0x07]
+; X32-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadu_si32:
+; X32-AVX512: # %bb.0: # %entry
+; X32-AVX512-NEXT: vmovss (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfa,0x10,0x07]
+; X32-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%__v.i = bitcast i8* %A to i32*
%0 = load i32, i32* %__v.i, align 1
@@ -2429,6 +2708,24 @@ define <2 x i64> @test_mm_loadu_si16(i8* nocapture readonly %A) {
; X64-AVX512-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_loadu_si16:
+; X32-SSE: # %bb.0: # %entry
+; X32-SSE-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_loadu_si16:
+; X32-AVX1: # %bb.0: # %entry
+; X32-AVX1-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07]
+; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_loadu_si16:
+; X32-AVX512: # %bb.0: # %entry
+; X32-AVX512-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07]
+; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%__v.i = bitcast i8* %A to i16*
%0 = load i16, i16* %__v.i, align 1
@@ -2486,6 +2783,18 @@ define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) noun
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
; X64-AVX-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_maskmoveu_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: # kill: def $edi killed $edi killed $rdi
+; X32-SSE-NEXT: addr32 maskmovdqu %xmm1, %xmm0 # encoding: [0x67,0x66,0x0f,0xf7,0xc1]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX-LABEL: test_mm_maskmoveu_si128:
+; X32-AVX: # %bb.0:
+; X32-AVX-NEXT: # kill: def $edi killed $edi killed $rdi
+; X32-AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0 # encoding: [0x67,0xc5,0xf9,0xf7,0xc1]
+; X32-AVX-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
@@ -3300,6 +3609,144 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_epi8:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
+; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
+; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
+; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
+; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
+; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
+; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
+; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
+; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
+; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
+; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_epi8:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
+; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
+; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
+; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
+; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
+; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
+; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
+; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
+; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
+; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
+; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
+; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
+; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
+; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
+; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
+; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
+; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_epi8:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
+; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
+; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
+; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
+; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
+; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
+; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
+; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
+; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
+; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
+; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
+; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
+; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
+; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
+; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
+; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X32-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
+; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
%res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
@@ -3450,6 +3897,62 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_epi16:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10]
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08]
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
+; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2]
+; X32-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1]
+; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9]
+; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
+; X32-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2]
+; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_epi16:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
+; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
+; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
+; X32-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
+; X32-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
+; X32-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X32-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
+; X32-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
+; X32-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_epi16:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
+; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
+; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
+; X32-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
+; X32-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
+; X32-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X32-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
+; X32-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
+; X32-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
%res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
@@ -3528,6 +4031,36 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_epi32:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
+; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2]
+; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
+; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_epi32:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X32-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
+; X32-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
+; X32-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_epi32:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X32-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
+; X32-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
+; X32-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
@@ -3598,6 +4131,30 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_epi64x:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf]
+; X32-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6]
+; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_epi64x:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
+; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_epi64x:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+; X32-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
+; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
%res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
ret <2 x i64> %res1
@@ -3652,6 +4209,25 @@ define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0]
+; X32-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x double> undef, double %a1, i32 0
%res1 = insertelement <2 x double> %res0, double %a0, i32 1
ret <2 x double> %res1
@@ -3699,6 +4275,24 @@ define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_pd1:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_pd1:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_pd1:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x double> undef, double %a0, i32 0
%res1 = insertelement <2 x double> %res0, double %a0, i32 1
ret <2 x double> %res1
@@ -3746,6 +4340,24 @@ define <2 x double> @test_mm_set_sd(double %a0) nounwind {
; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set_sd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],zero
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set_sd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0],zero
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set_sd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0],zero
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x double> undef, double %a0, i32 0
%res1 = insertelement <2 x double> %res0, double 0.0, i32 1
ret <2 x double> %res1
@@ -3802,6 +4414,31 @@ define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set1_epi8:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set1_epi8:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
+; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set1_epi8:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
%res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
@@ -3871,6 +4508,29 @@ define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set1_epi16:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set1_epi16:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set1_epi16:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
%res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
@@ -3924,6 +4584,25 @@ define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set1_epi32:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set1_epi32:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set1_epi32:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
@@ -3982,6 +4661,25 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set1_epi64x:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,1,0,1]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set1_epi64x:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set1_epi64x:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
%res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
ret <2 x i64> %res1
@@ -4029,6 +4727,24 @@ define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_set1_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_set1_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_set1_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x double> undef, double %a0, i32 0
%res1 = insertelement <2 x double> %res0, double %a0, i32 1
ret <2 x double> %res1
@@ -4310,6 +5026,144 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_setr_epi8:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
+; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
+; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
+; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
+; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
+; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
+; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
+; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
+; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
+; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
+; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
+; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
+; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_setr_epi8:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
+; X32-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
+; X32-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6]
+; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
+; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
+; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
+; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
+; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
+; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
+; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
+; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
+; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
+; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
+; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
+; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
+; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
+; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
+; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_setr_epi8:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
+; X32-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
+; X32-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
+; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
+; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
+; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
+; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
+; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
+; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
+; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
+; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
+; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
+; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
+; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
+; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
+; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
+; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
+; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
%res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
@@ -4460,6 +5314,62 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_setr_epi16:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
+; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
+; X32-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca]
+; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1]
+; X32-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0]
+; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
+; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
+; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
+; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde]
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_setr_epi16:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10]
+; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08]
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
+; X32-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
+; X32-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X32-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
+; X32-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
+; X32-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; X32-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_setr_epi16:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10]
+; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08]
+; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
+; X32-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
+; X32-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X32-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
+; X32-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
+; X32-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; X32-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
%res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
@@ -4538,6 +5448,36 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin
; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_setr_epi32:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
+; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
+; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
+; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6]
+; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_setr_epi32:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
+; X32-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
+; X32-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_setr_epi32:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
+; X32-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
+; X32-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
@@ -4608,6 +5548,30 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_setr_epi64x:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce]
+; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_setr_epi64x:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
+; X32-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
+; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_setr_epi64x:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
+; X32-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
+; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
%res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
ret <2 x i64> %res1
@@ -4661,6 +5625,24 @@ define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_setr_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
+; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_setr_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_setr_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <2 x double> undef, double %a0, i32 0
%res1 = insertelement <2 x double> %res0, double %a1, i32 1
ret <2 x double> %res1
@@ -5053,6 +6035,21 @@ define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_sqrt_sd_scalar:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_sqrt_sd_scalar:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_sqrt_sd_scalar:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%sqrt = call double @llvm.sqrt.f64(double %a0)
ret double %sqrt
}
@@ -5334,6 +6331,21 @@ define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_store_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_store_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_store_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
store <2 x double> %a1, <2 x double>* %arg0, align 16
ret void
@@ -5384,6 +6396,27 @@ define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_store_pd1:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
+; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_store_pd1:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_store_pd1:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double * %a0 to <2 x double>*
%shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %shuf, <2 x double>* %arg0, align 16
@@ -5423,6 +6456,21 @@ define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_store_sd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_store_sd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_store_sd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ext = extractelement <2 x double> %a1, i32 0
store double %ext, double* %a0, align 1
ret void
@@ -5461,6 +6509,21 @@ define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_store_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_store_si128:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_store_si128:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
store <2 x i64> %a1, <2 x i64>* %a0, align 16
ret void
}
@@ -5510,6 +6573,27 @@ define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_store1_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
+; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_store1_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_store1_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
+; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
+; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double * %a0 to <2 x double>*
%shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %shuf, <2 x double>* %arg0, align 16
@@ -5561,6 +6645,27 @@ define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
; X64-AVX512-NEXT: # xmm0 = xmm0[1,0]
; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storeh_sd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
+; X32-SSE-NEXT: # xmm0 = xmm0[1,1]
+; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storeh_sd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
+; X32-AVX1-NEXT: # xmm0 = xmm0[1,0]
+; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storeh_sd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
+; X32-AVX512-NEXT: # xmm0 = xmm0[1,0]
+; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ext = extractelement <2 x double> %a1, i32 1
store double %ext, double* %a0, align 8
ret void
@@ -5602,6 +6707,24 @@ define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storel_epi64:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
+; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storel_epi64:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storel_epi64:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ext = extractelement <2 x i64> %a1, i32 0
%bc = bitcast <2 x i64> *%a0 to i64*
store i64 %ext, i64* %bc, align 8
@@ -5641,6 +6764,21 @@ define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storel_sd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storel_sd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storel_sd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%ext = extractelement <2 x double> %a1, i32 0
store double %ext, double* %a0, align 8
ret void
@@ -5691,6 +6829,27 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
; X64-AVX512-NEXT: # xmm0 = xmm0[1,0]
; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storer_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
+; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
+; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storer_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
+; X32-AVX1-NEXT: # xmm0 = xmm0[1,0]
+; X32-AVX1-NEXT: vmovapd %xmm0, (%edi) # encoding: [0x67,0xc5,0xf9,0x29,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storer_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
+; X32-AVX512-NEXT: # xmm0 = xmm0[1,0]
+; X32-AVX512-NEXT: vmovapd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf9,0x29,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
%shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
store <2 x double> %shuf, <2 x double>* %arg0, align 16
@@ -5730,6 +6889,21 @@ define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storeu_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storeu_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storeu_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
store <2 x double> %a1, <2 x double>* %arg0, align 1
ret void
@@ -5768,6 +6942,21 @@ define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storeu_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storeu_si128:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storeu_si128:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
store <2 x i64> %a1, <2 x i64>* %a0, align 1
ret void
}
@@ -5808,6 +6997,24 @@ define void @test_mm_storeu_si64(i8* nocapture %A, <2 x i64> %B) {
; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storeu_si64:
+; X32-SSE: # %bb.0: # %entry
+; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
+; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storeu_si64:
+; X32-AVX1: # %bb.0: # %entry
+; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storeu_si64:
+; X32-AVX512: # %bb.0: # %entry
+; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%vecext.i = extractelement <2 x i64> %B, i32 0
%__v.i = bitcast i8* %A to i64*
@@ -5854,6 +7061,24 @@ define void @test_mm_storeu_si32(i8* nocapture %A, <2 x i64> %B) {
; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
; X64-AVX512-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storeu_si32:
+; X32-SSE: # %bb.0: # %entry
+; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
+; X32-SSE-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storeu_si32:
+; X32-AVX1: # %bb.0: # %entry
+; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
+; X32-AVX1-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storeu_si32:
+; X32-AVX512: # %bb.0: # %entry
+; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
+; X32-AVX512-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%0 = bitcast <2 x i64> %B to <4 x i32>
%vecext.i = extractelement <4 x i32> %0, i32 0
@@ -5901,6 +7126,24 @@ define void @test_mm_storeu_si16(i8* nocapture %A, <2 x i64> %B) {
; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
; X64-AVX512-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_storeu_si16:
+; X32-SSE: # %bb.0: # %entry
+; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
+; X32-SSE-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_storeu_si16:
+; X32-AVX1: # %bb.0: # %entry
+; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
+; X32-AVX1-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_storeu_si16:
+; X32-AVX512: # %bb.0: # %entry
+; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
+; X32-AVX512-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
entry:
%0 = bitcast <2 x i64> %B to <8 x i16>
%vecext.i = extractelement <8 x i16> %0, i32 0
@@ -5942,6 +7185,21 @@ define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_stream_pd:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_stream_pd:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_stream_pd:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
%arg0 = bitcast double* %a0 to <2 x double>*
store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
ret void
@@ -5959,6 +7217,11 @@ define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
; X64: # %bb.0:
; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37]
; X64-NEXT: retq # encoding: [0xc3]
+;
+; X32-LABEL: test_mm_stream_si32:
+; X32: # %bb.0:
+; X32-NEXT: movntil %esi, (%edi) # encoding: [0x67,0x0f,0xc3,0x37]
+; X32-NEXT: retq # encoding: [0xc3]
store i32 %a1, i32* %a0, align 1, !nontemporal !0
ret void
}
@@ -5996,6 +7259,21 @@ define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
+;
+; X32-SSE-LABEL: test_mm_stream_si128:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07]
+; X32-SSE-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX1-LABEL: test_mm_stream_si128:
+; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07]
+; X32-AVX1-NEXT: retq # encoding: [0xc3]
+;
+; X32-AVX512-LABEL: test_mm_stream_si128:
+; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07]
+; X32-AVX512-NEXT: retq # encoding: [0xc3]
store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
ret void
}
diff --git a/llvm/test/MC/X86/maskmovdqu.s b/llvm/test/MC/X86/maskmovdqu.s
new file mode 100644
index 0000000000000..685ee99d770fa
--- /dev/null
+++ b/llvm/test/MC/X86/maskmovdqu.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple i386-- --show-encoding %s |\
+// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING
+
+// RUN: llvm-mc -triple i386-- -filetype=obj %s |\
+// RUN: llvm-objdump -d - | FileCheck %s
+
+// CHECK-NOT: addr32
+// CHECK: maskmovdqu %xmm1, %xmm0
+// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1]
+maskmovdqu %xmm1, %xmm0
+
+// CHECK-NOT: addr32
+// CHECK: vmaskmovdqu %xmm1, %xmm0
+// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1]
+vmaskmovdqu %xmm1, %xmm0
diff --git a/llvm/test/MC/X86/maskmovdqu64.s b/llvm/test/MC/X86/maskmovdqu64.s
new file mode 100644
index 0000000000000..ba840a4c9a642
--- /dev/null
+++ b/llvm/test/MC/X86/maskmovdqu64.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple x86_64-- --show-encoding %s |\
+// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING
+
+// RUN: llvm-mc -triple x86_64-- -filetype=obj %s |\
+// RUN: llvm-objdump -d - | FileCheck %s
+
+// CHECK-NOT: addr32
+// CHECK: maskmovdqu %xmm1, %xmm0
+// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1]
+maskmovdqu %xmm1, %xmm0
+
+// CHECK-NOT: addr32
+// CHECK: vmaskmovdqu %xmm1, %xmm0
+// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1]
+vmaskmovdqu %xmm1, %xmm0
+
+// CHECK: addr32
+// ENCODING: encoding: [0x67]
+// CHECK: maskmovdqu %xmm1, %xmm0
+// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1]
+addr32 maskmovdqu %xmm1, %xmm0
+
+// CHECK: addr32
+// ENCODING: encoding: [0x67]
+// CHECK: vmaskmovdqu %xmm1, %xmm0
+// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1]
+addr32 vmaskmovdqu %xmm1, %xmm0
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 2d25289608f30..89069ec3e4ff9 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -102,7 +102,8 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_ADSIZE:
return (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE, noPrefix));
case IC_64BIT_OPSIZE_ADSIZE:
- return false;
+ return (noPrefix &&
+ inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE, noPrefix));
case IC_XD:
return inheritsFrom(child, IC_64BIT_XD);
case IC_XS:
@@ -123,10 +124,11 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_OPSIZE:
return inheritsFrom(child, IC_64BIT_REXW_OPSIZE) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE)) ||
- (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE));
+ (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)) ||
+ (!AdSize64 && inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE));
case IC_64BIT_XD:
- return(inheritsFrom(child, IC_64BIT_REXW_XD) ||
- (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
+ return (inheritsFrom(child, IC_64BIT_REXW_XD) ||
+ (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
case IC_64BIT_XS:
return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_XS_ADSIZE)));
@@ -156,7 +158,12 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_VEX_OPSIZE:
return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
(VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) ||
- (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE));
+ (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)) ||
+ inheritsFrom(child, IC_64BIT_VEX_OPSIZE);
+ case IC_64BIT_VEX_OPSIZE:
+ return inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE);
+ case IC_64BIT_VEX_OPSIZE_ADSIZE:
+ return false;
case IC_VEX_W:
return VEX_LIG && inheritsFrom(child, IC_VEX_L_W);
case IC_VEX_W_XS:
@@ -881,6 +888,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) {
if (index & ATTR_EVEX)
o << "IC_EVEX";
+ else if ((index & (ATTR_64BIT | ATTR_VEXL | ATTR_REXW | ATTR_OPSIZE)) ==
+ (ATTR_64BIT | ATTR_OPSIZE))
+ o << "IC_64BIT_VEX";
else
o << "IC_VEX";
@@ -892,9 +902,13 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_REXW)
o << "_W";
- if (index & ATTR_OPSIZE)
+ if (index & ATTR_OPSIZE) {
o << "_OPSIZE";
- else if (index & ATTR_XD)
+ if ((index & (ATTR_64BIT | ATTR_EVEX | ATTR_VEX | ATTR_VEXL |
+ ATTR_REXW | ATTR_ADSIZE)) ==
+ (ATTR_64BIT | ATTR_VEX | ATTR_ADSIZE))
+ o << "_ADSIZE";
+ } else if (index & ATTR_XD)
o << "_XD";
else if (index & ATTR_XS)
o << "_XS";
@@ -908,8 +922,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_EVEXB)
o << "_B";
}
- }
- else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
+ } else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
o << "IC_64BIT_REXW_XS";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
o << "IC_64BIT_REXW_XD";
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index e4b7c05cfb881..c2ca3791ac366 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -125,13 +125,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
return;
}
- // Special case since there is no attribute class for 64-bit and VEX
- if (Name == "VMASKMOVDQU64") {
- ShouldBeEmitted = false;
- return;
- }
-
- ShouldBeEmitted = true;
+ ShouldBeEmitted = true;
}
void RecognizableInstr::processInstr(DisassemblerTables &tables,
@@ -267,6 +261,11 @@ InstructionContext RecognizableInstr::insnContext() const {
insnContext = IC_VEX_L_OPSIZE;
else if (OpPrefix == X86Local::PD && HasVEX_W)
insnContext = IC_VEX_W_OPSIZE;
+ else if (OpPrefix == X86Local::PD && Is64Bit &&
+ AdSize == X86Local::AdSize32)
+ insnContext = IC_64BIT_VEX_OPSIZE_ADSIZE;
+ else if (OpPrefix == X86Local::PD && Is64Bit)
+ insnContext = IC_64BIT_VEX_OPSIZE;
else if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_OPSIZE;
else if (HasVEX_LPrefix && OpPrefix == X86Local::XS)
More information about the llvm-commits
mailing list