[llvm] 45c3fe8 - [X86] Add test coverage for the concatable sources vpermv3 -> vpermv fold for non-constant shuffle masks (#133415)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 04:58:25 PDT 2025


Author: Simon Pilgrim
Date: 2025-03-28T11:58:21Z
New Revision: 45c3fe8ff339843cdf0cfc8ccae91c0a4b7c09cd

URL: https://github.com/llvm/llvm-project/commit/45c3fe8ff339843cdf0cfc8ccae91c0a4b7c09cd
DIFF: https://github.com/llvm/llvm-project/commit/45c3fe8ff339843cdf0cfc8ccae91c0a4b7c09cd.diff

LOG: [X86] Add test coverage for the concatable sources vpermv3 -> vpermv fold for non-constant shuffle masks (#133415)

Test both forward/reverse concat cases

Added: 
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vl.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vl.ll
new file mode 100644
index 0000000000000..1f4228b1fdec9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vl.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64
+
+declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>) #1
+
+define <4 x double> @concat_vpermv3_ops_vpermv_v4f64(ptr %p0, <4 x i64> %m) {
+; X86-LABEL: concat_vpermv3_ops_vpermv_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpermpd (%eax), %zmm0, %zmm0
+; X86-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: concat_vpermv3_ops_vpermv_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-NEXT:    vpermpd (%rdi), %zmm0, %zmm0
+; X64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NEXT:    retq
+  %p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
+  %lo = load <4 x double>, ptr %p0, align 32
+  %hi = load <4 x double>, ptr %p1, align 32
+  %res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi)
+  ret <4 x double> %res
+}
+
+define <4 x double> @concat_vpermv3_ops_vpermv_swap_v4f64(ptr %p0, <4 x i64> %m) {
+; X86-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovapd 32(%eax), %ymm1
+; X86-NEXT:    vpermi2pd (%eax), %ymm1, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: concat_vpermv3_ops_vpermv_swap_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovapd 32(%rdi), %ymm1
+; X64-NEXT:    vpermi2pd (%rdi), %ymm1, %ymm0
+; X64-NEXT:    retq
+  %p1 = getelementptr inbounds nuw i8, ptr %p0, i64 32
+  %lo = load <4 x double>, ptr %p1, align 32
+  %hi = load <4 x double>, ptr %p0, align 32
+  %res = tail call noundef <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %lo, <4 x i64> %m, <4 x double> %hi)
+  ret <4 x double> %res
+}


        


More information about the llvm-commits mailing list