[llvm] r333749 - [Utils][X86] Help update_llc_test_checks.py to recognise retl/retq to reduce CHECK duplication (PR35003)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 1 06:37:01 PDT 2018
Author: rksimon
Date: Fri Jun 1 06:37:01 2018
New Revision: 333749
URL: http://llvm.org/viewvc/llvm-project?rev=333749&view=rev
Log:
[Utils][X86] Help update_llc_test_checks.py to recognise retl/retq to reduce CHECK duplication (PR35003)
This patch replaces the --x86_extra_scrub command line argument to automatically support a second level of regex-scrubbing if it improves the matching of nearly-identical code patterns. The argument '--extra_scrub' is there now to force extra matching if required.
This is mostly useful to help us share 32-bit/64-bit x86 vector tests which only differs by retl/retq instructions, but any scrubber can now technically support this, meaning test checks don't have to be needlessly obfuscated.
I've updated some of the existing checks that had been manually run with --x86_extra_scrub, to demonstrate the extra "ret{{[l|q]}}" scrub now only happens when useful, and re-run the sse42-intrinsics file to show extra matches - most sse/avx intrinsics files should be able to now share 32/64 checks.
Tested with the opt/analysis scripts as well which share common code - AFAICT the other update scripts use their own versions.
Differential Revision: https://reviews.llvm.org/D47485
Modified:
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/packss.ll
llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
llvm/trunk/utils/UpdateTestChecks/asm.py
llvm/trunk/utils/UpdateTestChecks/common.py
llvm/trunk/utils/update_llc_test_checks.py
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Fri Jun 1 06:37:01 2018
@@ -101,12 +101,12 @@ define <4 x double> @test_x86_avx_vbroad
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_avx_vbroadcastf128_pd_256:
; X64: # %bb.0:
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -118,12 +118,12 @@ define <8 x float> @test_x86_avx_vbroadc
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_avx_vbroadcastf128_ps_256:
; X64: # %bb.0:
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -402,14 +402,14 @@ define void @test_x86_sse2_storeu_dq(i8*
; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovdqu %xmm0, (%eax)
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_sse2_storeu_dq:
; X64: # %bb.0:
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqu %xmm0, (%rdi)
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
ret void
@@ -426,7 +426,7 @@ define void @test_x86_sse2_storeu_pd(i8*
; X86-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovupd %xmm0, (%eax)
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_sse2_storeu_pd:
; X64: # %bb.0:
@@ -434,7 +434,7 @@ define void @test_x86_sse2_storeu_pd(i8*
; X64-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovupd %xmm0, (%rdi)
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void
@@ -447,12 +447,12 @@ define void @test_x86_sse_storeu_ps(i8*
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovups %xmm0, (%eax)
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_sse_storeu_ps:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi)
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
ret void
}
@@ -472,7 +472,7 @@ define void @test_x86_avx_storeu_dq_256(
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: vmovups %ymm0, (%eax)
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_avx_storeu_dq_256:
; X64: # %bb.0:
@@ -483,7 +483,7 @@ define void @test_x86_avx_storeu_dq_256(
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: vmovups %ymm0, (%rdi)
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
ret void
@@ -500,7 +500,7 @@ define void @test_x86_avx_storeu_pd_256(
; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X86-NEXT: vmovupd %ymm0, (%eax)
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_avx_storeu_pd_256:
; X64: # %bb.0:
@@ -508,7 +508,7 @@ define void @test_x86_avx_storeu_pd_256(
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X64-NEXT: vmovupd %ymm0, (%rdi)
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
ret void
@@ -522,13 +522,13 @@ define void @test_x86_avx_storeu_ps_256(
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovups %ymm0, (%eax)
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_x86_avx_storeu_ps_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi)
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Fri Jun 1 06:37:01 2018
@@ -287,12 +287,12 @@ define <32 x i8> @test_x86_avx_ldu_dq_25
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_ldu_dq_256:
; X64: # %bb.0:
; X64-NEXT: vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
ret <32 x i8> %res
}
@@ -304,12 +304,12 @@ define <2 x double> @test_x86_avx_masklo
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskload_pd:
; X64: # %bb.0:
; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -321,12 +321,12 @@ define <4 x double> @test_x86_avx_masklo
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskload_pd_256:
; X64: # %bb.0:
; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -338,12 +338,12 @@ define <4 x float> @test_x86_avx_maskloa
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskload_ps:
; X64: # %bb.0:
; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -355,12 +355,12 @@ define <8 x float> @test_x86_avx_maskloa
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskload_ps_256:
; X64: # %bb.0:
; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -372,12 +372,12 @@ define void @test_x86_avx_maskstore_pd(i
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskstore_pd:
; X64: # %bb.0:
; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
ret void
}
@@ -390,13 +390,13 @@ define void @test_x86_avx_maskstore_pd_2
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskstore_pd_256:
; X64: # %bb.0:
; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
ret void
}
@@ -408,12 +408,12 @@ define void @test_x86_avx_maskstore_ps(i
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskstore_ps:
; X64: # %bb.0:
; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
ret void
}
@@ -426,13 +426,13 @@ define void @test_x86_avx_maskstore_ps_2
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx_maskstore_ps_256:
; X64: # %bb.0:
; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
ret void
}
@@ -720,23 +720,23 @@ define <4 x float> @test_x86_avx_vpermil
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
-; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
; X86-AVX512VL: # %bb.0:
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
-; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07]
-; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07]
-; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%a2 = load <4 x i32>, <4 x i32>* %a1
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -951,7 +951,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
; X86-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
; X86-AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: movnt_dq:
; X86-AVX512VL: # %bb.0:
@@ -960,7 +960,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
; X86-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
; X86-AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: movnt_dq:
; X64-AVX: # %bb.0:
@@ -968,7 +968,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
; X64-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
; X64-AVX-NEXT: vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: movnt_dq:
; X64-AVX512VL: # %bb.0:
@@ -976,7 +976,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
; X64-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
; X64-AVX512VL-NEXT: vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
%a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
@@ -990,26 +990,26 @@ define void @movnt_ps(i8* %p, <8 x float
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: movnt_ps:
; X86-AVX512VL: # %bb.0:
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: movnt_ps:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]
; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: movnt_ps:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
ret void
}
@@ -1024,7 +1024,7 @@ define void @movnt_pd(i8* %p, <4 x doubl
; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
; X86-AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: movnt_pd:
; X86-AVX512VL: # %bb.0:
@@ -1033,7 +1033,7 @@ define void @movnt_pd(i8* %p, <4 x doubl
; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
; X86-AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: movnt_pd:
; X64-AVX: # %bb.0:
@@ -1041,7 +1041,7 @@ define void @movnt_pd(i8* %p, <4 x doubl
; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
; X64-AVX-NEXT: vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]
; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: movnt_pd:
; X64-AVX512VL: # %bb.0:
@@ -1049,7 +1049,7 @@ define void @movnt_pd(i8* %p, <4 x doubl
; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
; X64-AVX512VL-NEXT: vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
ret void
Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll Fri Jun 1 06:37:01 2018
@@ -367,12 +367,12 @@ define <4 x i64> @test_mm256_broadcastsi
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_broadcastsi128_si256_mem:
; X64: # %bb.0:
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%a0 = load <2 x i64>, <2 x i64>* %p0
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x i64> %res
@@ -766,7 +766,7 @@ define <2 x i64> @test_mm_i32gather_epi3
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovdqa %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i32gather_epi32:
; X64: # %bb.0:
@@ -774,7 +774,7 @@ define <2 x i64> @test_mm_i32gather_epi3
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovdqa %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i32 *%a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
@@ -789,12 +789,12 @@ define <2 x i64> @test_mm_mask_i32gather
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i32gather_epi32:
; X64: # %bb.0:
; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast i32 *%a1 to i8*
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -812,7 +812,7 @@ define <4 x i64> @test_mm256_i32gather_e
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1
; X86-NEXT: vmovdqa %ymm1, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i32gather_epi32:
; X64: # %bb.0:
@@ -820,7 +820,7 @@ define <4 x i64> @test_mm256_i32gather_e
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovdqa %ymm1, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i32 *%a0 to i8*
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i32>
@@ -835,12 +835,12 @@ define <4 x i64> @test_mm256_mask_i32gat
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i32gather_epi32:
; X64: # %bb.0:
; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast i32 *%a1 to i8*
%arg2 = bitcast <4 x i64> %a2 to <8 x i32>
@@ -858,7 +858,7 @@ define <2 x i64> @test_mm_i32gather_epi6
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovdqa %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i32gather_epi64:
; X64: # %bb.0:
@@ -866,7 +866,7 @@ define <2 x i64> @test_mm_i32gather_epi6
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovdqa %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64 *%a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> <i64 -1, i64 -1>, i8 2)
@@ -879,12 +879,12 @@ define <2 x i64> @test_mm_mask_i32gather
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i32gather_epi64:
; X64: # %bb.0:
; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast i64 *%a1 to i8*
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
%res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2)
@@ -899,7 +899,7 @@ define <4 x i64> @test_mm256_i32gather_e
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1
; X86-NEXT: vmovdqa %ymm1, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i32gather_epi64:
; X64: # %bb.0:
@@ -907,7 +907,7 @@ define <4 x i64> @test_mm256_i32gather_e
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1
; X64-NEXT: vmovdqa %ymm1, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64 *%a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)
@@ -920,12 +920,12 @@ define <4 x i64> @test_mm256_mask_i32gat
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i32gather_epi64:
; X64: # %bb.0:
; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast i64 *%a1 to i8*
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
%res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2)
@@ -940,7 +940,7 @@ define <2 x double> @test_mm_i32gather_p
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovapd %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i32gather_pd:
; X64: # %bb.0:
@@ -948,7 +948,7 @@ define <2 x double> @test_mm_i32gather_p
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovapd %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast double *%a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
@@ -964,12 +964,12 @@ define <2 x double> @test_mm_mask_i32gat
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i32gather_pd:
; X64: # %bb.0:
; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast double *%a1 to i8*
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
%res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2)
@@ -984,7 +984,7 @@ define <4 x double> @test_mm256_i32gathe
; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1
; X86-NEXT: vmovapd %ymm1, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i32gather_pd:
; X64: # %bb.0:
@@ -992,7 +992,7 @@ define <4 x double> @test_mm256_i32gathe
; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1
; X64-NEXT: vmovapd %ymm1, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast double *%a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
@@ -1006,12 +1006,12 @@ define <4 x double> @test_mm256_mask_i32
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i32gather_pd:
; X64: # %bb.0:
; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast double *%a1 to i8*
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
%res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2)
@@ -1026,7 +1026,7 @@ define <4 x float> @test_mm_i32gather_ps
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovaps %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i32gather_ps:
; X64: # %bb.0:
@@ -1034,7 +1034,7 @@ define <4 x float> @test_mm_i32gather_ps
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast float *%a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
@@ -1050,12 +1050,12 @@ define <4 x float> @test_mm_mask_i32gath
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i32gather_ps:
; X64: # %bb.0:
; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast float *%a1 to i8*
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
%call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2)
@@ -1070,7 +1070,7 @@ define <8 x float> @test_mm256_i32gather
; X86-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2
; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1
; X86-NEXT: vmovaps %ymm1, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i32gather_ps:
; X64: # %bb.0:
@@ -1078,7 +1078,7 @@ define <8 x float> @test_mm256_i32gather
; X64-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2
; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovaps %ymm1, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast float *%a0 to i8*
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0)
@@ -1092,12 +1092,12 @@ define <8 x float> @test_mm256_mask_i32g
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i32gather_ps:
; X64: # %bb.0:
; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast float *%a1 to i8*
%arg2 = bitcast <4 x i64> %a2 to <8 x i32>
%call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2)
@@ -1112,7 +1112,7 @@ define <2 x i64> @test_mm_i64gather_epi3
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovdqa %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i64gather_epi32:
; X64: # %bb.0:
@@ -1120,7 +1120,7 @@ define <2 x i64> @test_mm_i64gather_epi3
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovdqa %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i32 *%a0 to i8*
%mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
%call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2)
@@ -1134,12 +1134,12 @@ define <2 x i64> @test_mm_mask_i64gather
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i64gather_epi32:
; X64: # %bb.0:
; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast i32 *%a1 to i8*
%arg3 = bitcast <2 x i64> %a3 to <4 x i32>
@@ -1157,7 +1157,7 @@ define <2 x i64> @test_mm256_i64gather_e
; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1
; X86-NEXT: vmovdqa %xmm1, %xmm0
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i64gather_epi32:
; X64: # %bb.0:
@@ -1166,7 +1166,7 @@ define <2 x i64> @test_mm256_i64gather_e
; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1
; X64-NEXT: vmovdqa %xmm1, %xmm0
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i32 *%a0 to i8*
%mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
%call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2)
@@ -1181,13 +1181,13 @@ define <2 x i64> @test_mm256_mask_i64gat
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i64gather_epi32:
; X64: # %bb.0:
; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast i32 *%a1 to i8*
%arg3 = bitcast <2 x i64> %a3 to <4 x i32>
@@ -1204,7 +1204,7 @@ define <2 x i64> @test_mm_i64gather_epi6
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovdqa %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i64gather_epi64:
; X64: # %bb.0:
@@ -1212,7 +1212,7 @@ define <2 x i64> @test_mm_i64gather_epi6
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovdqa %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64 *%a0 to i8*
%call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> <i64 -1, i64 -1>, i8 2)
ret <2 x i64> %call
@@ -1224,12 +1224,12 @@ define <2 x i64> @test_mm_mask_i64gather
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i64gather_epi64:
; X64: # %bb.0:
; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast i64 *%a1 to i8*
%call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
ret <2 x i64> %call
@@ -1243,7 +1243,7 @@ define <4 x i64> @test_mm256_i64gather_e
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1
; X86-NEXT: vmovdqa %ymm1, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i64gather_epi64:
; X64: # %bb.0:
@@ -1251,7 +1251,7 @@ define <4 x i64> @test_mm256_i64gather_e
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovdqa %ymm1, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64 *%a0 to i8*
%call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)
ret <4 x i64> %call
@@ -1263,12 +1263,12 @@ define <4 x i64> @test_mm256_mask_i64gat
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i64gather_epi64:
; X64: # %bb.0:
; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast i64 *%a1 to i8*
%call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
ret <4 x i64> %call
@@ -1282,7 +1282,7 @@ define <2 x double> @test_mm_i64gather_p
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovapd %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i64gather_pd:
; X64: # %bb.0:
@@ -1290,7 +1290,7 @@ define <2 x double> @test_mm_i64gather_p
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovapd %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast double *%a0 to i8*
%cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
@@ -1305,12 +1305,12 @@ define <2 x double> @test_mm_mask_i64gat
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i64gather_pd:
; X64: # %bb.0:
; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast double *%a1 to i8*
%call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2)
ret <2 x double> %call
@@ -1324,7 +1324,7 @@ define <4 x double> @test_mm256_i64gathe
; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1
; X86-NEXT: vmovapd %ymm1, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i64gather_pd:
; X64: # %bb.0:
@@ -1332,7 +1332,7 @@ define <4 x double> @test_mm256_i64gathe
; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1
; X64-NEXT: vmovapd %ymm1, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast double *%a0 to i8*
%mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
%call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2)
@@ -1345,12 +1345,12 @@ define <4 x double> @test_mm256_mask_i64
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i64gather_pd:
; X64: # %bb.0:
; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast i64 *%a1 to i8*
%call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2)
ret <4 x double> %call
@@ -1364,7 +1364,7 @@ define <4 x float> @test_mm_i64gather_ps
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1
; X86-NEXT: vmovaps %xmm1, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_i64gather_ps:
; X64: # %bb.0:
@@ -1372,7 +1372,7 @@ define <4 x float> @test_mm_i64gather_ps
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast float *%a0 to i8*
%cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -1387,12 +1387,12 @@ define <4 x float> @test_mm_mask_i64gath
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_i64gather_ps:
; X64: # %bb.0:
; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast float *%a1 to i8*
%call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2)
ret <4 x float> %call
@@ -1407,7 +1407,7 @@ define <4 x float> @test_mm256_i64gather
; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_i64gather_ps:
; X64: # %bb.0:
@@ -1416,7 +1416,7 @@ define <4 x float> @test_mm256_i64gather
; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast float *%a0 to i8*
%cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -1432,13 +1432,13 @@ define <4 x float> @test_mm256_mask_i64g
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_i64gather_ps:
; X64: # %bb.0:
; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg1 = bitcast float *%a1 to i8*
%call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2)
ret <4 x float> %call
@@ -1496,12 +1496,12 @@ define <2 x i64> @test_mm_maskload_epi32
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovd (%eax), %xmm0, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskload_epi32:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i32* %a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1)
@@ -1515,12 +1515,12 @@ define <4 x i64> @test_mm256_maskload_ep
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovd (%eax), %ymm0, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskload_epi32:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i32* %a0 to i8*
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1)
@@ -1534,12 +1534,12 @@ define <2 x i64> @test_mm_maskload_epi64
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovq (%eax), %xmm0, %xmm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskload_epi64:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64* %a0 to i8*
%res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1)
ret <2 x i64> %res
@@ -1551,12 +1551,12 @@ define <4 x i64> @test_mm256_maskload_ep
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovq (%eax), %ymm0, %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskload_epi64:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64* %a0 to i8*
%res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1)
ret <4 x i64> %res
@@ -1568,12 +1568,12 @@ define void @test_mm_maskstore_epi32(flo
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovd %xmm1, %xmm0, (%eax)
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskstore_epi32:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi)
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast float* %a0 to i8*
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
@@ -1588,13 +1588,13 @@ define void @test_mm256_maskstore_epi32(
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovd %ymm1, %ymm0, (%eax)
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskstore_epi32:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast float* %a0 to i8*
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
%arg2 = bitcast <4 x i64> %a2 to <8 x i32>
@@ -1608,12 +1608,12 @@ define void @test_mm_maskstore_epi64(i64
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovq %xmm1, %xmm0, (%eax)
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskstore_epi64:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi)
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64* %a0 to i8*
call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2)
ret void
@@ -1626,13 +1626,13 @@ define void @test_mm256_maskstore_epi64(
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vpmaskmovq %ymm1, %ymm0, (%eax)
; X86-NEXT: vzeroupper
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskstore_epi64:
; X64: # %bb.0:
; X64-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi)
; X64-NEXT: vzeroupper
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast i64* %a0 to i8*
call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2)
ret void
@@ -2465,12 +2465,12 @@ define <4 x i64> @test_mm256_stream_load
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovntdqa (%eax), %ymm0
-; X86-NEXT: ret{{[l|q]}}
+; X86-NEXT: retl
;
; X64-LABEL: test_mm256_stream_load_si256:
; X64: # %bb.0:
; X64-NEXT: vmovntdqa (%rdi), %ymm0
-; X64-NEXT: ret{{[l|q]}}
+; X64-NEXT: retq
%arg0 = bitcast <4 x i64> *%a0 to i8*
%res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0)
ret <4 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/packss.ll?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/packss.ll Fri Jun 1 06:37:01 2018
@@ -74,28 +74,28 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_
; X86-SSE-NEXT: psrad $31, %xmm0
; X86-SSE-NEXT: pcmpgtd {{\.LCPI.*}}, %xmm1
; X86-SSE-NEXT: packssdw %xmm1, %xmm0
-; X86-SSE-NEXT: ret{{[l|q]}}
+; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vpsrad $31, %xmm0, %xmm0
; X86-AVX-NEXT: vpcmpgtd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: ret{{[l|q]}}
+; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: psrad $31, %xmm0
; X64-SSE-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
; X64-SSE-NEXT: packssdw %xmm1, %xmm0
-; X64-SSE-NEXT: ret{{[l|q]}}
+; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpsrad $31, %xmm0, %xmm0
; X64-AVX-NEXT: vpcmpgtd {{.*}}(%rip), %xmm1, %xmm1
; X64-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: ret{{[l|q]}}
+; X64-AVX-NEXT: retq
%1 = ashr <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>
%2 = icmp sgt <4 x i32> %b, <i32 1, i32 16, i32 255, i32 65535>
%3 = sext <4 x i1> %2 to <4 x i32>
Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll Fri Jun 1 06:37:01 2018
@@ -195,34 +195,22 @@ define i32 @test_mm_cmpestrz(<2 x i64> %
declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpgt_epi64:
-; X32: # %bb.0:
-; X32-NEXT: pcmpgtq %xmm1, %xmm0
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpgt_epi64:
-; X64: # %bb.0:
-; X64-NEXT: pcmpgtq %xmm1, %xmm0
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpgt_epi64:
+; ALL: # %bb.0:
+; ALL-NEXT: pcmpgtq %xmm1, %xmm0
+; ALL-NEXT: ret{{[l|q]}}
%cmp = icmp sgt <2 x i64> %a0, %a1
%res = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %res
}
define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistra:
-; X32: # %bb.0:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: seta %al
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistra:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: seta %al
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistra:
+; ALL: # %bb.0:
+; ALL-NEXT: xorl %eax, %eax
+; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
+; ALL-NEXT: seta %al
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
@@ -231,19 +219,12 @@ define i32 @test_mm_cmpistra(<2 x i64> %
declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistrc:
-; X32: # %bb.0:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: setb %al
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistrc:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: setb %al
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistrc:
+; ALL: # %bb.0:
+; ALL-NEXT: xorl %eax, %eax
+; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
+; ALL-NEXT: setb %al
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
@@ -252,17 +233,11 @@ define i32 @test_mm_cmpistrc(<2 x i64> %
declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistri:
-; X32: # %bb.0:
-; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistri:
-; X64: # %bb.0:
-; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistri:
+; ALL: # %bb.0:
+; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
+; ALL-NEXT: movl %ecx, %eax
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
@@ -271,15 +246,10 @@ define i32 @test_mm_cmpistri(<2 x i64> %
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistrm:
-; X32: # %bb.0:
-; X32-NEXT: pcmpistrm $7, %xmm1, %xmm0
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistrm:
-; X64: # %bb.0:
-; X64-NEXT: pcmpistrm $7, %xmm1, %xmm0
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistrm:
+; ALL: # %bb.0:
+; ALL-NEXT: pcmpistrm $7, %xmm1, %xmm0
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
@@ -289,19 +259,12 @@ define <2 x i64> @test_mm_cmpistrm(<2 x
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistro:
-; X32: # %bb.0:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: seto %al
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistro:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: seto %al
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistro:
+; ALL: # %bb.0:
+; ALL-NEXT: xorl %eax, %eax
+; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
+; ALL-NEXT: seto %al
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
@@ -310,19 +273,12 @@ define i32 @test_mm_cmpistro(<2 x i64> %
declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistrs:
-; X32: # %bb.0:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: sets %al
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistrs:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: sets %al
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistrs:
+; ALL: # %bb.0:
+; ALL-NEXT: xorl %eax, %eax
+; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
+; ALL-NEXT: sets %al
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
@@ -331,19 +287,12 @@ define i32 @test_mm_cmpistrs(<2 x i64> %
declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) {
-; X32-LABEL: test_mm_cmpistrz:
-; X32: # %bb.0:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: sete %al
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm_cmpistrz:
-; X64: # %bb.0:
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: sete %al
-; X64-NEXT: retq
+; ALL-LABEL: test_mm_cmpistrz:
+; ALL: # %bb.0:
+; ALL-NEXT: xorl %eax, %eax
+; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
+; ALL-NEXT: sete %al
+; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
Modified: llvm/trunk/utils/UpdateTestChecks/asm.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/UpdateTestChecks/asm.py?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/utils/UpdateTestChecks/asm.py (original)
+++ llvm/trunk/utils/UpdateTestChecks/asm.py Fri Jun 1 06:37:01 2018
@@ -107,7 +107,7 @@ def scrub_asm_x86(asm, args):
asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
# Generically match a LCP symbol.
asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
- if getattr(args, 'x86_extra_scrub', False):
+ if getattr(args, 'extra_scrub', False):
# Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
# Strip kill operands inserted into the asm.
Modified: llvm/trunk/utils/UpdateTestChecks/common.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/UpdateTestChecks/common.py?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/utils/UpdateTestChecks/common.py (original)
+++ llvm/trunk/utils/UpdateTestChecks/common.py Fri Jun 1 06:37:01 2018
@@ -3,6 +3,7 @@ import re
import string
import subprocess
import sys
+import copy
if sys.version_info[0] > 2:
class string:
@@ -80,13 +81,29 @@ def scrub_body(body):
body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
return body
+def do_scrub(body, scrubber, scrubber_args, extra):
+ if scrubber_args:
+ local_args = copy.deepcopy(scrubber_args)
+ local_args[0].extra_scrub = extra
+ return scrubber(body, *local_args)
+ return scrubber(body, *scrubber_args)
+
# Build up a dictionary of all the function bodies.
+class function_body(object):
+ def __init__(self, string, extra):
+ self.scrub = string
+ self.extrascrub = extra
+ def __str__(self):
+ return self.scrub
+
def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
for m in function_re.finditer(raw_tool_output):
if not m:
continue
func = m.group('func')
- scrubbed_body = scrubber(m.group('body'), *scrubber_args)
+ body = m.group('body')
+ scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
+ scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
if m.groupdict().has_key('analysis'):
analysis = m.group('analysis')
if analysis.lower() != 'cost model analysis':
@@ -99,15 +116,19 @@ def build_function_body_dictionary(funct
for l in scrubbed_body.splitlines():
print(' ' + l, file=sys.stderr)
for prefix in prefixes:
- if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
- if prefix == prefixes[-1]:
- print('WARNING: Found conflicting asm under the '
- 'same prefix: %r!' % (prefix,), file=sys.stderr)
- else:
- func_dict[prefix][func] = None
+ if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body:
+ if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra:
+ func_dict[prefix][func].scrub = scrubbed_extra
continue
+ else:
+ if prefix == prefixes[-1]:
+ print('WARNING: Found conflicting asm under the '
+ 'same prefix: %r!' % (prefix,), file=sys.stderr)
+ else:
+ func_dict[prefix][func] = None
+ continue
- func_dict[prefix][func] = scrubbed_body
+ func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra)
##### Generator of LLVM IR CHECK lines
@@ -188,7 +209,7 @@ def add_checks(output_lines, comment_mar
printed_prefixes.append(checkprefix)
output_lines.append(check_label_format % (checkprefix, func_name))
- func_body = func_dict[checkprefix][func_name].splitlines()
+ func_body = str(func_dict[checkprefix][func_name]).splitlines()
# For ASM output, just emit the check lines.
if is_asm == True:
Modified: llvm/trunk/utils/update_llc_test_checks.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/update_llc_test_checks.py?rev=333749&r1=333748&r2=333749&view=diff
==============================================================================
--- llvm/trunk/utils/update_llc_test_checks.py (original)
+++ llvm/trunk/utils/update_llc_test_checks.py Fri Jun 1 06:37:01 2018
@@ -28,8 +28,8 @@ def main():
parser.add_argument(
'--function', help='The function in the test file to update')
parser.add_argument(
- '--x86_extra_scrub', action='store_true',
- help='Use more regex for x86 matching to reduce diffs between various subtargets')
+ '--extra_scrub', action='store_true',
+ help='Always use additional regex to further reduce diffs between various subtargets')
parser.add_argument('tests', nargs='+')
args = parser.parse_args()
More information about the llvm-commits
mailing list