[llvm] r347380 - [X86] In getScalarMaskingNode, replace scalar_to_vector with a bitcast to v8i1 and an extract_subvector to convert i8 to v1i1.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 20 23:01:22 PST 2018
Author: ctopper
Date: Tue Nov 20 23:01:22 2018
New Revision: 347380
URL: http://llvm.org/viewvc/llvm-project?rev=347380&view=rev
Log:
[X86] In getScalarMaskingNode, replace scalar_to_vector with a bitcast to v8i1 and an extract_subvector to convert i8 to v1i1.
The bitcast can be nicely merged with any i8 loads that exist for argument passing in 32 mode for example.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-memfold.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512er-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347380&r1=347379&r2=347380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 20 23:01:22 2018
@@ -21238,7 +21238,9 @@ static SDValue getScalarMaskingNode(SDVa
SDLoc dl(Op);
assert(Mask.getValueType() == MVT::i8 && "Unexpect type");
- SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
+ SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1,
+ DAG.getBitcast(MVT::v8i1, Mask),
+ DAG.getIntPtrConstant(0, dl));
if (Op.getOpcode() == X86ISD::FSETCCM ||
Op.getOpcode() == X86ISD::FSETCCM_RND ||
Op.getOpcode() == X86ISD::VFPCLASSS)
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=347380&r1=347379&r2=347380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Nov 20 23:01:22 2018
@@ -3354,8 +3354,8 @@ declare <2 x double> @llvm.x86.avx512.ma
define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
; CHECK-LABEL: test_getexp_sd:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3
; CHECK-NEXT: vmovapd %xmm2, %xmm4
; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 {%k1}
; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm5 {%k1} {z}
@@ -3393,11 +3393,11 @@ define i8 at test_int_x86_avx512_mask_cmp_s
define i8 at test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %ecx
; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %edx
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %esi
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
@@ -3438,11 +3438,11 @@ define i8 at test_int_x86_avx512_mask_cmp_s
define i8 at test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %ecx
; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %edx
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %esi
; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k0 {%k1}
@@ -3500,8 +3500,8 @@ declare <2 x double> @llvm.x86.avx512.ma
define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: vmovapd %xmm2, %xmm4
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 {%k1} {z}
@@ -3525,8 +3525,8 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3
; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vaddps %xmm4, %xmm2, %xmm2
@@ -4220,9 +4220,9 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss:
; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %xmm0, %xmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4 {%k1} {z}
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
@@ -4299,9 +4299,9 @@ declare <2 x double> @llvm.x86.avx512.ma
define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd:
; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %xmm0, %xmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4 {%k1}
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
Modified: llvm/trunk/test/CodeGen/X86/avx512-memfold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-memfold.ll?rev=347380&r1=347379&r2=347380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-memfold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-memfold.ll Tue Nov 20 23:01:22 2018
@@ -72,8 +72,8 @@ define <2 x double> @test_int_x86_avx512
define <4 x float> @test_mask_add_ss_double_use(<4 x float> %a, float* %b, i8 %mask, <4 x float> %c) {
; CHECK-LABEL: test_mask_add_ss_double_use:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: vaddss %xmm2, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vaddss %xmm2, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmulps %xmm0, %xmm1, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=347380&r1=347379&r2=347380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Tue Nov 20 23:01:22 2018
@@ -668,8 +668,8 @@ define <2 x double>@test_int_x86_avx512_
; X86-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd:
; X86-AVX512DQ: # %bb.0:
; X86-AVX512DQ-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X86-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
+; X86-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X86-AVX512DQ-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xc1,0x04]
; X86-AVX512DQ-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0x58,0xc0]
; X86-AVX512DQ-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0x58,0xc0]
@@ -678,8 +678,8 @@ define <2 x double>@test_int_x86_avx512_
; X86-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd:
; X86-AVX512DQVL: # %bb.0:
; X86-AVX512DQVL-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X86-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
+; X86-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X86-AVX512DQVL-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xc1,0x04]
; X86-AVX512DQVL-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
; X86-AVX512DQVL-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
@@ -687,8 +687,8 @@ define <2 x double>@test_int_x86_avx512_
;
; X64-AVX512DQ-LABEL: test_int_x86_avx512_mask_range_sd:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X64-AVX512DQ-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X64-AVX512DQ-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
; X64-AVX512DQ-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xc1,0x04]
; X64-AVX512DQ-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0x58,0xc0]
@@ -697,8 +697,8 @@ define <2 x double>@test_int_x86_avx512_
;
; X64-AVX512DQVL-LABEL: test_int_x86_avx512_mask_range_sd:
; X64-AVX512DQVL: # %bb.0:
-; X64-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X64-AVX512DQVL-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x51,0xd9,0x04]
; X64-AVX512DQVL-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x51,0xd1,0x04]
; X64-AVX512DQVL-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x18,0x51,0xc1,0x04]
; X64-AVX512DQVL-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
@@ -752,8 +752,8 @@ declare i8 @llvm.x86.avx512.mask.fpclass
define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfpclasssd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x02]
-; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x04]
+; CHECK-NEXT: vfpclasssd $4, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x04]
+; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x02]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
@@ -787,8 +787,8 @@ declare i8 @llvm.x86.avx512.mask.fpclass
define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfpclassss $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x02]
-; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x04]
+; CHECK-NEXT: vfpclassss $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x04]
+; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x02]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
Modified: llvm/trunk/test/CodeGen/X86/avx512er-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512er-intrinsics.ll?rev=347380&r1=347379&r2=347380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512er-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512er-intrinsics.ll Tue Nov 20 23:01:22 2018
@@ -153,7 +153,7 @@ define <4 x float> @test_rsqrt28_ss_load
define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) {
; X86-LABEL: test_rsqrt28_ss_maskz:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
@@ -170,7 +170,7 @@ define <4 x float> @test_rsqrt28_ss_mask
define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) {
; X86-LABEL: test_rsqrt28_ss_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
; X86-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
@@ -189,7 +189,7 @@ define <4 x float> @test_rsqrt28_ss_mask
define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, <2 x double>* %a1ptr, <2 x double> %a2, i8 %mask) {
; X86-LABEL: test_rcp28_sd_mask_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
; X86-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
@@ -210,7 +210,7 @@ declare <2 x double> @llvm.x86.avx512.rc
define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, <2 x double>* %a1ptr, i8 %mask) {
; X86-LABEL: test_rsqrt28_sd_maskz_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
@@ -228,7 +228,7 @@ define <2 x double> @test_rsqrt28_sd_mas
define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) {
; X86-LABEL: test_rsqrt28_sd_maskz:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
@@ -245,7 +245,7 @@ define <2 x double> @test_rsqrt28_sd_mas
define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) {
; X86-LABEL: test_rsqrt28_sd_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
; X86-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
@@ -266,9 +266,9 @@ declare <2 x double> @llvm.x86.avx512.rs
define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr, i8 %mask) {
; X86-LABEL: test_rsqrt28_sd_maskz_mem:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl # encoding: [0x8a,0x4c,0x24,0x08]
-; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -286,9 +286,9 @@ define <2 x double> @test_rsqrt28_sd_mas
define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr, i8 %mask) {
; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl # encoding: [0x8a,0x4c,0x24,0x08]
-; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12]
; X86-NEXT: retl # encoding: [0xc3]
;
More information about the llvm-commits
mailing list