[llvm] e3eee9e - [X86] vector-trunc.ll - replace stores to ptr undef with real ptr values
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 24 07:09:36 PDT 2025
Author: Simon Pilgrim
Date: 2025-04-24T15:09:17+01:00
New Revision: e3eee9e81ec2a42d1f78d8a4e52f4b4ee8927436
URL: https://github.com/llvm/llvm-project/commit/e3eee9e81ec2a42d1f78d8a4e52f4b4ee8927436
DIFF: https://github.com/llvm/llvm-project/commit/e3eee9e81ec2a42d1f78d8a4e52f4b4ee8927436.diff
LOG: [X86] vector-trunc.ll - replace stores to ptr undef with real ptr values
Prevents some unnecessary codegen changes in an upcoming patch
Added:
Modified:
llvm/test/CodeGen/X86/vector-trunc.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 732cc445ddcd8..2e2bf33192b42 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -215,7 +215,7 @@ entry:
ret <8 x i16> %0
}
-define void @trunc8i64_8i8(<8 x i64> %a) {
+define void @trunc8i64_8i8(<8 x i64> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc8i64_8i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
@@ -227,7 +227,7 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc8i64_8i8:
@@ -241,7 +241,7 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
; SSE41-NEXT: packusdw %xmm1, %xmm0
; SSE41-NEXT: packusdw %xmm2, %xmm0
; SSE41-NEXT: packuswb %xmm0, %xmm0
-; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: movq %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc8i64_8i8:
@@ -255,7 +255,7 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vmovq %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -269,18 +269,18 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vmovq %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc8i64_8i8:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpmovqb %zmm0, (%rax)
+; AVX512-NEXT: vpmovqb %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = trunc <8 x i64> %a to <8 x i8>
- store <8 x i8> %0, ptr undef, align 4
+ store <8 x i8> %0, ptr %b, align 4
ret void
}
@@ -492,7 +492,7 @@ entry:
ret <8 x i16> %1
}
-define void @trunc8i32_8i8(<8 x i32> %a) {
+define void @trunc8i32_8i8(<8 x i32> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc8i32_8i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
@@ -500,7 +500,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
-; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc8i32_8i8:
@@ -510,7 +510,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
; SSE41-NEXT: packuswb %xmm0, %xmm0
-; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: movq %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc8i32_8i8:
@@ -520,7 +520,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vmovq %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -531,7 +531,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vmovq %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -539,13 +539,13 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vmovq %xmm0, (%rax)
+; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc8i32_8i8:
; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpmovdb %ymm0, (%rax)
+; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
@@ -553,22 +553,22 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc8i32_8i8:
; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
entry:
%0 = trunc <8 x i32> %a to <8 x i8>
- store <8 x i8> %0, ptr undef, align 4
+ store <8 x i8> %0, ptr %b, align 4
ret void
}
-define void @trunc16i32_16i16(<16 x i32> %a) {
+define void @trunc16i32_16i16(<16 x i32> %a, ptr %b) {
; SSE2-LABEL: trunc16i32_16i16:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: pslld $16, %xmm1
@@ -581,8 +581,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
; SSE2-NEXT: pslld $16, %xmm2
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: packssdw %xmm3, %xmm2
-; SSE2-NEXT: movdqu %xmm2, (%rax)
-; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE2-NEXT: movdqu %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc16i32_16i16:
@@ -594,8 +594,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
; SSSE3-NEXT: pshufb %xmm4, %xmm3
; SSSE3-NEXT: pshufb %xmm4, %xmm2
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSSE3-NEXT: movdqu %xmm2, (%rax)
-; SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
+; SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i32_16i16:
@@ -607,8 +607,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
; SSE41-NEXT: packusdw %xmm3, %xmm2
-; SSE41-NEXT: movdqu %xmm2, (%rax)
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i32_16i16:
@@ -620,8 +620,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqu %xmm1, (%rax)
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm1, 16(%rdi)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -632,22 +632,22 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc16i32_16i16:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpmovdw %zmm0, (%rax)
+; AVX512-NEXT: vpmovdw %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = trunc <16 x i32> %a to <16 x i16>
- store <16 x i16> %0, ptr undef, align 4
+ store <16 x i16> %0, ptr %b, align 4
ret void
}
-define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
+define void @trunc16i32_16i16_ashr(<16 x i32> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc16i32_16i16_ashr:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: psrad $16, %xmm1
@@ -656,8 +656,8 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
; SSE2-SSSE3-NEXT: psrad $16, %xmm3
; SSE2-SSSE3-NEXT: psrad $16, %xmm2
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i32_16i16_ashr:
@@ -668,8 +668,8 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
; SSE41-NEXT: psrld $16, %xmm1
; SSE41-NEXT: psrld $16, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
-; SSE41-NEXT: movdqu %xmm2, (%rax)
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i32_16i16_ashr:
@@ -682,8 +682,8 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm1, (%rax)
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm1, 16(%rdi)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -693,24 +693,24 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc16i32_16i16_ashr:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpsrld $16, %zmm0, %zmm0
-; AVX512-NEXT: vpmovdw %zmm0, (%rax)
+; AVX512-NEXT: vpmovdw %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = ashr <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%1 = trunc <16 x i32> %0 to <16 x i16>
- store <16 x i16> %1, ptr undef, align 4
+ store <16 x i16> %1, ptr %b, align 4
ret void
}
-define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
+define void @trunc16i32_16i16_lshr(<16 x i32> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc16i32_16i16_lshr:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: psrad $16, %xmm1
@@ -719,8 +719,8 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
; SSE2-SSSE3-NEXT: psrad $16, %xmm3
; SSE2-SSSE3-NEXT: psrad $16, %xmm2
; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i32_16i16_lshr:
@@ -731,8 +731,8 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
; SSE41-NEXT: psrld $16, %xmm1
; SSE41-NEXT: psrld $16, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
-; SSE41-NEXT: movdqu %xmm2, (%rax)
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i32_16i16_lshr:
@@ -745,8 +745,8 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm1, (%rax)
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm1, 16(%rdi)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -756,24 +756,24 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc16i32_16i16_lshr:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpsrld $16, %zmm0, %zmm0
-; AVX512-NEXT: vpmovdw %zmm0, (%rax)
+; AVX512-NEXT: vpmovdw %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = lshr <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%1 = trunc <16 x i32> %0 to <16 x i16>
- store <16 x i16> %1, ptr undef, align 4
+ store <16 x i16> %1, ptr %b, align 4
ret void
}
-define void @trunc16i32_16i8(<16 x i32> %a) {
+define void @trunc16i32_16i8(<16 x i32> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc16i32_16i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
@@ -784,7 +784,7 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i32_16i8:
@@ -797,7 +797,7 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: packusdw %xmm1, %xmm0
; SSE41-NEXT: packuswb %xmm2, %xmm0
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i32_16i8:
@@ -810,7 +810,7 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -823,22 +823,22 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc16i32_16i8:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = trunc <16 x i32> %a to <16 x i8>
- store <16 x i8> %0, ptr undef, align 4
+ store <16 x i8> %0, ptr %b, align 4
ret void
}
-define void @trunc16i32_16i8_ashr(<16 x i32> %a) {
+define void @trunc16i32_16i8_ashr(<16 x i32> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc16i32_16i8_ashr:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: psrld $24, %xmm1
@@ -848,7 +848,7 @@ define void @trunc16i32_16i8_ashr(<16 x i32> %a) {
; SSE2-SSSE3-NEXT: psrld $24, %xmm2
; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i32_16i8_ashr:
@@ -860,7 +860,7 @@ define void @trunc16i32_16i8_ashr(<16 x i32> %a) {
; SSE41-NEXT: psrld $24, %xmm2
; SSE41-NEXT: packusdw %xmm3, %xmm2
; SSE41-NEXT: packuswb %xmm2, %xmm0
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i32_16i8_ashr:
@@ -874,7 +874,7 @@ define void @trunc16i32_16i8_ashr(<16 x i32> %a) {
; AVX1-NEXT: vpsrld $24, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -886,24 +886,24 @@ define void @trunc16i32_16i8_ashr(<16 x i32> %a) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc16i32_16i8_ashr:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpsrld $24, %zmm0, %zmm0
-; AVX512-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = ashr <16 x i32> %a, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
%1 = trunc <16 x i32> %0 to <16 x i8>
- store <16 x i8> %1, ptr undef, align 4
+ store <16 x i8> %1, ptr %b, align 4
ret void
}
-define void @trunc16i32_16i8_lshr(<16 x i32> %a) {
+define void @trunc16i32_16i8_lshr(<16 x i32> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc16i32_16i8_lshr:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: psrld $24, %xmm1
@@ -913,7 +913,7 @@ define void @trunc16i32_16i8_lshr(<16 x i32> %a) {
; SSE2-SSSE3-NEXT: psrld $24, %xmm2
; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i32_16i8_lshr:
@@ -925,7 +925,7 @@ define void @trunc16i32_16i8_lshr(<16 x i32> %a) {
; SSE41-NEXT: psrld $24, %xmm2
; SSE41-NEXT: packusdw %xmm3, %xmm2
; SSE41-NEXT: packuswb %xmm2, %xmm0
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i32_16i8_lshr:
@@ -939,7 +939,7 @@ define void @trunc16i32_16i8_lshr(<16 x i32> %a) {
; AVX1-NEXT: vpsrld $24, %xmm1, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -951,32 +951,32 @@ define void @trunc16i32_16i8_lshr(<16 x i32> %a) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc16i32_16i8_lshr:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpsrld $24, %zmm0, %zmm0
-; AVX512-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%0 = lshr <16 x i32> %a, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
%1 = trunc <16 x i32> %0 to <16 x i8>
- store <16 x i8> %1, ptr undef, align 4
+ store <16 x i8> %1, ptr %b, align 4
ret void
}
;PR25684
-define void @trunc16i16_16i8(<16 x i16> %a) {
+define void @trunc16i16_16i8(<16 x i16> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc16i16_16i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc16i16_16i8:
@@ -985,7 +985,7 @@ define void @trunc16i16_16i8(<16 x i16> %a) {
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: packuswb %xmm1, %xmm0
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc16i16_16i8:
@@ -993,7 +993,7 @@ define void @trunc16i16_16i8(<16 x i16> %a) {
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -1002,21 +1002,21 @@ define void @trunc16i16_16i8(<16 x i16> %a) {
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: trunc16i16_16i8:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc16i16_16i8:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
@@ -1024,28 +1024,28 @@ define void @trunc16i16_16i8(<16 x i16> %a) {
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512BW-NEXT: vmovdqu %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc16i16_16i8:
; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
entry:
%0 = trunc <16 x i16> %a to <16 x i8>
- store <16 x i8> %0, ptr undef, align 4
+ store <16 x i8> %0, ptr %b, align 4
ret void
}
-define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
+define void @trunc16i16_16i8_ashr(<16 x i16> %a, ptr %b) {
; SSE-LABEL: trunc16i16_16i8_ashr:
; SSE: # %bb.0: # %entry
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
-; SSE-NEXT: movdqu %xmm0, (%rax)
+; SSE-NEXT: movdqu %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc16i16_16i8_ashr:
@@ -1054,7 +1054,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -1063,7 +1063,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -1071,7 +1071,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -1079,7 +1079,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
@@ -1087,30 +1087,30 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512BW-NEXT: vmovdqu %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc16i16_16i8_ashr:
; AVX512BWVL: # %bb.0: # %entry
; AVX512BWVL-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
entry:
%0 = ashr <16 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%1 = trunc <16 x i16> %0 to <16 x i8>
- store <16 x i8> %1, ptr undef, align 4
+ store <16 x i8> %1, ptr %b, align 4
ret void
}
-define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
+define void @trunc16i16_16i8_lshr(<16 x i16> %a, ptr %b) {
; SSE-LABEL: trunc16i16_16i8_lshr:
; SSE: # %bb.0: # %entry
; SSE-NEXT: psrlw $8, %xmm1
; SSE-NEXT: psrlw $8, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
-; SSE-NEXT: movdqu %xmm0, (%rax)
+; SSE-NEXT: movdqu %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc16i16_16i8_lshr:
@@ -1119,7 +1119,7 @@ define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -1128,7 +1128,7 @@ define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -1136,7 +1136,7 @@ define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -1144,7 +1144,7 @@ define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
@@ -1152,24 +1152,24 @@ define void @trunc16i16_16i8_lshr(<16 x i16> %a) {
; AVX512BW: # %bb.0: # %entry
; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512BW-NEXT: vmovdqu %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc16i16_16i8_lshr:
; AVX512BWVL: # %bb.0: # %entry
; AVX512BWVL-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
entry:
%0 = lshr <16 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%1 = trunc <16 x i16> %0 to <16 x i8>
- store <16 x i8> %1, ptr undef, align 4
+ store <16 x i8> %1, ptr %b, align 4
ret void
}
-define void @trunc32i16_32i8(<32 x i16> %a) {
+define void @trunc32i16_32i8(<32 x i16> %a, ptr %b) {
; SSE2-SSSE3-LABEL: trunc32i16_32i8:
; SSE2-SSSE3: # %bb.0: # %entry
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
@@ -1179,8 +1179,8 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
-; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rdi)
; SSE2-SSSE3-NEXT: retq
;
; SSE41-LABEL: trunc32i16_32i8:
@@ -1192,8 +1192,8 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
; SSE41-NEXT: pand %xmm4, %xmm3
; SSE41-NEXT: pand %xmm4, %xmm2
; SSE41-NEXT: packuswb %xmm3, %xmm2
-; SSE41-NEXT: movdqu %xmm2, (%rax)
-; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE41-NEXT: movdqu %xmm0, (%rdi)
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc32i16_32i8:
@@ -1205,8 +1205,8 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqu %xmm1, (%rax)
-; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vmovdqu %xmm1, 16(%rdi)
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -1217,7 +1217,7 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -1225,9 +1225,9 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT: vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT: vpmovdb %zmm1, 16(%rdi)
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -1235,26 +1235,26 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT: vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT: vpmovdb %zmm1, 16(%rdi)
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc32i16_32i8:
; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpmovwb %zmm0, (%rax)
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc32i16_32i8:
; AVX512BWVL: # %bb.0: # %entry
-; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax)
+; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
entry:
%0 = trunc <32 x i16> %a to <32 x i8>
- store <32 x i8> %0, ptr undef, align 4
+ store <32 x i8> %0, ptr %b, align 4
ret void
}
More information about the llvm-commits
mailing list