[llvm] 225768d - [X86] combineConcatVectorOps - add tests showing v4i64 shift-by-32 with unnecessary concatenation
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 06:52:31 PDT 2025
Author: Simon Pilgrim
Date: 2025-06-20T14:52:12+01:00
New Revision: 225768d1f9f2e2ccff7dc79b4a4aaeab4c6aafc1
URL: https://github.com/llvm/llvm-project/commit/225768d1f9f2e2ccff7dc79b4a4aaeab4c6aafc1
DIFF: https://github.com/llvm/llvm-project/commit/225768d1f9f2e2ccff7dc79b4a4aaeab4c6aafc1.diff
LOG: [X86] combineConcatVectorOps - add tests showing v4i64 shift-by-32 with unnecessary concatenation
On AVX1-only targets, we concat SHL/SRL AVX1 v4i64 by 32-bits as a shuffle. But this is only worth while if the shift source value is free to concatenate.
Added:
Modified:
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index d8852956c66f3..b45525b6e20f9 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -1971,6 +1971,73 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
ret <4 x i64> %shift
}
+define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind {
+; AVX1-LABEL: shift32_v4i64_concat:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shift32_v4i64_concat:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: shift32_v4i64_concat:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
+; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift32_v4i64_concat:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift32_v4i64_concat:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift32_v4i64_concat:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift32_v4i64_concat:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
+; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift32_v4i64_concat:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; X86-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %a = shufflevector <2 x i64> %lo, <2 x i64> %hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shift = lshr <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
+ ret <4 x i64> %shift
+}
+
define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
; AVX1-LABEL: sh_trunc_sh_vec:
; AVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 3f238b5739f06..2248ee997d525 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -1823,3 +1823,70 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
%shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
ret <4 x i64> %shift
}
+
+define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind {
+; AVX1-LABEL: shift32_v4i64_concat:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shift32_v4i64_concat:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: shift32_v4i64_concat:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
+; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift32_v4i64_concat:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift32_v4i64_concat:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift32_v4i64_concat:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift32_v4i64_concat:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
+; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift32_v4i64_concat:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; X86-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %a = shufflevector <2 x i64> %lo, <2 x i64> %hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
+ ret <4 x i64> %shift
+}
More information about the llvm-commits
mailing list