[llvm] 889911e - [X86][AVX] Eliminate redundant movzbl instruction.
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 20 16:20:14 PDT 2022
Author: Luo, Yuanke
Date: 2022-10-21T07:17:51+08:00
New Revision: 889911ec268a342ec5da0e343eda85233fb62f5a
URL: https://github.com/llvm/llvm-project/commit/889911ec268a342ec5da0e343eda85233fb62f5a
DIFF: https://github.com/llvm/llvm-project/commit/889911ec268a342ec5da0e343eda85233fb62f5a.diff
LOG: [X86][AVX] Eliminate redundant movzbl instruction.
The movzbl instruction can be combined to vpinsrb or vmovd, when it is
actual lowered from anyext.
Differential Revision: https://reviews.llvm.org/D130953
Added:
Modified:
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/load-scalar-as-vector.ll
llvm/test/CodeGen/X86/pr15267.ll
llvm/test/CodeGen/X86/setcc-lowering.ll
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b0996c75222e7..2cf4f3ac67f23 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4736,6 +4736,9 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
}
let Predicates = [HasAVX512] in {
+ def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
+ (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIZrr GR32:$src)>;
@@ -11714,6 +11717,25 @@ defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+let Predicates = [HasAVX512, NoBWI] in {
+ def : Pat<(X86pinsrb VR128:$src1,
+ (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
+ timm:$src3),
+ (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
+ timm:$src3)>;
+}
+
+let Predicates = [HasBWI] in {
+ def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
+ (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src2, sub_8bit), timm:$src3)>;
+ def : Pat<(X86pinsrb VR128:$src1,
+ (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
+ timm:$src3),
+ (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
+ timm:$src3)>;
+}
+
// Always select FP16 instructions if available.
let Predicates = [HasBWI], AddedComplexity = -10 in {
def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c5557bd5df4e4..c792270280e2b 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4244,6 +4244,9 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
let Predicates = [UseAVX] in {
+ def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
+ (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIrr GR32:$src)>;
@@ -5353,8 +5356,13 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
-let Predicates = [HasAVX, NoBWI] in
+let Predicates = [HasAVX, NoBWI] in {
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
+ def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
+ (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src2, sub_8bit), timm:$src3)>;
+}
+
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
index 40bb10bcd27f6..0a89bea2e6ce8 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -715,8 +715,7 @@ define <4 x i64> @test_mm256_insert_epi8(<4 x i64> %a0, i8 %a1) nounwind {
;
; X64-LABEL: test_mm256_insert_epi8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1
+; X64-NEXT: vpinsrb $4, %edi, %xmm0, %xmm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
@@ -1418,8 +1417,8 @@ define <4 x i64> @test_mm256_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8
; X86-LABEL: test_mm256_set_epi8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: vmovd %ecx, %xmm0
+; X86-NEXT: vmovd %eax, %xmm0
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
@@ -1450,8 +1449,8 @@ define <4 x i64> @test_mm256_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: vmovd %ecx, %xmm1
+; X86-NEXT: vmovd %eax, %xmm1
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
@@ -1487,8 +1486,8 @@ define <4 x i64> @test_mm256_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8
; X64-LABEL: test_mm256_set_epi8:
; X64: # %bb.0:
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
-; X64-NEXT: vmovd %r10d, %xmm0
+; X64-NEXT: vmovd %eax, %xmm0
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
@@ -1506,21 +1505,15 @@ define <4 x i64> @test_mm256_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8
; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %r9b, %eax
-; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %r8b, %eax
-; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; X64-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
+; X64-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0
+; X64-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; X64-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
+; X64-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
+; X64-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: vmovd %eax, %xmm1
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: vmovd %ecx, %xmm1
; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
@@ -1859,8 +1852,7 @@ define <4 x i64> @test_mm256_set1_epi8(i8 %a0) nounwind {
;
; X64-LABEL: test_mm256_set1_epi8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: vmovd %eax, %xmm0
+; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm0
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -2034,8 +2026,8 @@ define <4 x i64> @test_mm256_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i
; X86-LABEL: test_mm256_setr_epi8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: vmovd %ecx, %xmm0
+; X86-NEXT: vmovd %eax, %xmm0
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
@@ -2066,8 +2058,8 @@ define <4 x i64> @test_mm256_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: vmovd %ecx, %xmm1
+; X86-NEXT: vmovd %eax, %xmm1
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
@@ -2103,8 +2095,8 @@ define <4 x i64> @test_mm256_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i
; X64-LABEL: test_mm256_setr_epi8:
; X64: # %bb.0:
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
-; X64-NEXT: vmovd %r10d, %xmm0
+; X64-NEXT: vmovd %eax, %xmm0
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
@@ -2134,18 +2126,12 @@ define <4 x i64> @test_mm256_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i
; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: movzbl %dil, %esi
-; X64-NEXT: vmovd %esi, %xmm1
-; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; X64-NEXT: movzbl %r8b, %eax
-; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; X64-NEXT: movzbl %r9b, %eax
-; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; X64-NEXT: vmovd %edi, %xmm1
+; X64-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1
+; X64-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
+; X64-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
+; X64-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1
+; X64-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
index fa0e6648b8712..13b07532ceea4 100644
--- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
+++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
@@ -100,7 +100,6 @@ define <16 x i8> @sub_op1_constant(ptr %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: addb $-42, %al
-; AVX-NEXT: movzbl %al, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i8, ptr %p
@@ -242,7 +241,6 @@ define <16 x i8> @shl_op1_constant(ptr %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: shlb $5, %al
-; AVX-NEXT: movzbl %al, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i8, ptr %p
@@ -542,7 +540,6 @@ define <16 x i8> @urem_op1_constant(ptr %p) nounwind {
; AVX-NEXT: shrl $10, %ecx
; AVX-NEXT: imull $42, %ecx, %ecx
; AVX-NEXT: subb %cl, %al
-; AVX-NEXT: movzbl %al, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i8, ptr %p
diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll
index 38107284509e0..9750ddf39a000 100644
--- a/llvm/test/CodeGen/X86/pr15267.ll
+++ b/llvm/test/CodeGen/X86/pr15267.ll
@@ -31,19 +31,15 @@ define <4 x i1> @test2(ptr %in) nounwind {
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrb %cl
; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movzbl %cl, %ecx
; CHECK-NEXT: movl %eax, %edx
; CHECK-NEXT: andb $1, %dl
-; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: vmovd %edx, %xmm0
; CHECK-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrb $2, %cl
; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movzbl %cl, %ecx
; CHECK-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; CHECK-NEXT: shrb $3, %al
-; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; CHECK-NEXT: retq
%ret = load <4 x i1>, ptr %in, align 1
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index f740c59250905..705e48ca4c9c9 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -44,7 +44,6 @@ define void @pr26232(i64 %a, <16 x i1> %b) {
; AVX-NEXT: .p2align 4, 0x90
; AVX-NEXT: .LBB1_1: # %for_loop599
; AVX-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: cmpq $65536, %rdi # imm = 0x10000
; AVX-NEXT: setl %al
; AVX-NEXT: vmovd %eax, %xmm2
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index c0f0b1f3d43d9..6d5b45bcf41da 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -3395,9 +3395,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
;
; X86-AVX1-LABEL: test_mm_set_epi8:
; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
+; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
-; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
-; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -3431,9 +3431,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
;
; X86-AVX512-LABEL: test_mm_set_epi8:
; X86-AVX512: # %bb.0:
+; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
+; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
-; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40]
-; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -3533,9 +3533,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
;
; X64-AVX1-LABEL: test_mm_set_epi8:
; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
+; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
-; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x50]
-; X64-AVX1-NEXT: vmovd %r10d, %xmm0 # encoding: [0xc4,0xc1,0x79,0x6e,0xc2]
; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -3553,25 +3553,19 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
-; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
-; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
-; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
-; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
-; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X64-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
+; X64-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
+; X64-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
+; X64-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
+; X64-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
+; X64-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_set_epi8:
; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
+; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
-; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x50]
-; X64-AVX512-NEXT: vmovd %r10d, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc2]
; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -3589,18 +3583,12 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
-; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
-; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
-; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
-; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
-; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X64-AVX512-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
+; X64-AVX512-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
+; X64-AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
+; X64-AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
+; X64-AVX512-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
+; X64-AVX512-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
;
; X32-SSE-LABEL: test_mm_set_epi8:
@@ -3671,9 +3659,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
;
; X32-AVX1-LABEL: test_mm_set_epi8:
; X32-AVX1: # %bb.0:
+; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
-; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x50]
-; X32-AVX1-NEXT: vmovd %r10d, %xmm0 # encoding: [0xc4,0xc1,0x79,0x6e,0xc2]
; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -3691,25 +3679,19 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
-; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
-; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
-; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
-; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
-; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X32-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
+; X32-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
+; X32-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
+; X32-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
+; X32-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
+; X32-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
; X32-AVX1-NEXT: retq # encoding: [0xc3]
;
; X32-AVX512-LABEL: test_mm_set_epi8:
; X32-AVX512: # %bb.0:
+; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
+; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
-; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x50]
-; X32-AVX512-NEXT: vmovd %r10d, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc2]
; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -3727,18 +3709,12 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
-; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
-; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
-; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
-; X32-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
-; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
+; X32-AVX512-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
+; X32-AVX512-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
+; X32-AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
+; X32-AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
+; X32-AVX512-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
+; X32-AVX512-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
; X32-AVX512-NEXT: retq # encoding: [0xc3]
%res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
@@ -4397,8 +4373,7 @@ define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
;
; X64-AVX1-LABEL: test_mm_set1_epi8:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
-; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
@@ -4422,8 +4397,7 @@ define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
;
; X32-AVX1-LABEL: test_mm_set1_epi8:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
-; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
; X32-AVX1-NEXT: retq # encoding: [0xc3]
@@ -4812,9 +4786,9 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
;
; X86-AVX1-LABEL: test_mm_setr_epi8:
; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
-; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -4848,9 +4822,9 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
;
; X86-AVX512-LABEL: test_mm_setr_epi8:
; X86-AVX512: # %bb.0:
+; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
-; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -4950,18 +4924,12 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
;
; X64-AVX1-LABEL: test_mm_setr_epi8:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
-; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6]
-; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
-; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X64-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
+; X64-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
+; X64-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
+; X64-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
+; X64-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
@@ -4986,18 +4954,12 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
;
; X64-AVX512-LABEL: test_mm_setr_epi8:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X64-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
-; X64-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
-; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
-; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
+; X64-AVX512-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
+; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
+; X64-AVX512-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
+; X64-AVX512-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
+; X64-AVX512-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
@@ -5088,18 +5050,12 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
;
; X32-AVX1-LABEL: test_mm_setr_epi8:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X32-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
-; X32-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6]
-; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
-; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
+; X32-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
+; X32-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
+; X32-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
+; X32-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
@@ -5124,18 +5080,12 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
;
; X32-AVX512-LABEL: test_mm_setr_epi8:
; X32-AVX512: # %bb.0:
-; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
-; X32-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7]
-; X32-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
-; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
-; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
-; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
-; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
-; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
+; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
+; X32-AVX512-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
+; X32-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
+; X32-AVX512-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
+; X32-AVX512-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
+; X32-AVX512-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
index 7c272d72db42d..47630501864a5 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
@@ -574,8 +574,7 @@ define <2 x i64> @test_mm_insert_epi8(<2 x i64> %a0, i8 %a1) {
;
; X64-AVX-LABEL: test_mm_insert_epi8:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: movzbl %dil, %eax
-; X64-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; X64-AVX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0
; X64-AVX-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%res = insertelement <16 x i8> %arg0, i8 %a1,i32 1
More information about the llvm-commits
mailing list