[llvm] 2d756d2 - [NFC][Codegen][X86] Add tests where we could improve `freeze` handling
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 22 13:03:48 PST 2022
Author: Roman Lebedev
Date: 2022-12-23T00:03:26+03:00
New Revision: 2d756d25e68826787277ecf532048dd6d55eb2d0
URL: https://github.com/llvm/llvm-project/commit/2d756d25e68826787277ecf532048dd6d55eb2d0
DIFF: https://github.com/llvm/llvm-project/commit/2d756d25e68826787277ecf532048dd6d55eb2d0.diff
LOG: [NFC][Codegen][X86] Add tests where we could improve `freeze` handling
Added:
Modified:
llvm/test/CodeGen/X86/freeze-vector.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll
index ff90ba39d962..c10e35c2cf0d 100644
--- a/llvm/test/CodeGen/X86/freeze-vector.ll
+++ b/llvm/test/CodeGen/X86/freeze-vector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind {
; CHECK-LABEL: freeze_insert_subvector:
@@ -58,3 +58,205 @@ define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind {
%z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %z
}
+
+define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind {
+; X86-LABEL: freeze_bitcast_from_wider_elt:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_bitcast_from_wider_elt:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vmovlps %xmm0, (%rsi)
+; X64-NEXT: retq
+ %i0 = load <4 x i16>, ptr %origin
+ %i1 = bitcast <4 x i16> %i0 to <8 x i8>
+ %i2 = freeze <8 x i8> %i1
+ %i3 = bitcast <8 x i8> %i2 to i64
+ store i64 %i3, ptr %dst
+ ret void
+}
+define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind {
+; X86-LABEL: freeze_bitcast_from_wider_elt_escape:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vmovsd %xmm0, (%ecx)
+; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_bitcast_from_wider_elt_escape:
+; X64: # %bb.0:
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: movq %rax, (%rsi)
+; X64-NEXT: vmovq %xmm0, (%rdx)
+; X64-NEXT: retq
+ %i0 = load <4 x i16>, ptr %origin
+ %i1 = bitcast <4 x i16> %i0 to <8 x i8>
+ store <8 x i8> %i1, ptr %escape
+ %i2 = freeze <8 x i8> %i1
+ %i3 = bitcast <8 x i8> %i2 to i64
+ store i64 %i3, ptr %dst
+ ret void
+}
+
+define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind {
+; X86-LABEL: freeze_bitcast_to_wider_elt:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_bitcast_to_wider_elt:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vmovlps %xmm0, (%rsi)
+; X64-NEXT: retq
+ %i0 = load <8 x i8>, ptr %origin
+ %i1 = bitcast <8 x i8> %i0 to <4 x i16>
+ %i2 = freeze <4 x i16> %i1
+ %i3 = bitcast <4 x i16> %i2 to i64
+ store i64 %i3, ptr %dst
+ ret void
+}
+define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind {
+; X86-LABEL: freeze_bitcast_to_wider_elt_escape:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vmovsd %xmm0, (%ecx)
+; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_bitcast_to_wider_elt_escape:
+; X64: # %bb.0:
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: movq %rax, (%rsi)
+; X64-NEXT: vmovq %xmm0, (%rdx)
+; X64-NEXT: retq
+ %i0 = load <8 x i8>, ptr %origin
+ %i1 = bitcast <8 x i8> %i0 to <4 x i16>
+ store <4 x i16> %i1, ptr %escape
+ %i2 = freeze <4 x i16> %i1
+ %i3 = bitcast <4 x i16> %i2 to i64
+ store i64 %i3, ptr %dst
+ ret void
+}
+
+define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind {
+; X86-LABEL: freeze_extractelement:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: vmovdqa (%edx), %xmm0
+; X86-NEXT: vpand (%ecx), %xmm0, %xmm0
+; X86-NEXT: vpextrb $6, %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_extractelement:
+; X64: # %bb.0:
+; X64-NEXT: vmovdqa (%rdi), %xmm0
+; X64-NEXT: vpand (%rsi), %xmm0, %xmm0
+; X64-NEXT: vpextrb $6, %xmm0, (%rdx)
+; X64-NEXT: retq
+ %i0 = load <16 x i8>, ptr %origin0
+ %i1 = load <16 x i8>, ptr %origin1
+ %i2 = and <16 x i8> %i0, %i1
+ %i3 = freeze <16 x i8> %i2
+ %i4 = extractelement <16 x i8> %i3, i64 6
+ store i8 %i4, ptr %dst
+ ret void
+}
+define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind {
+; X86-LABEL: freeze_extractelement_escape:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: vmovdqa (%esi), %xmm0
+; X86-NEXT: vpand (%edx), %xmm0, %xmm0
+; X86-NEXT: vmovdqa %xmm0, (%ecx)
+; X86-NEXT: vpextrb $6, %xmm0, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_extractelement_escape:
+; X64: # %bb.0:
+; X64-NEXT: vmovdqa (%rdi), %xmm0
+; X64-NEXT: vpand (%rsi), %xmm0, %xmm0
+; X64-NEXT: vmovdqa %xmm0, (%rcx)
+; X64-NEXT: vpextrb $6, %xmm0, (%rdx)
+; X64-NEXT: retq
+ %i0 = load <16 x i8>, ptr %origin0
+ %i1 = load <16 x i8>, ptr %origin1
+ %i2 = and <16 x i8> %i0, %i1
+ %i3 = freeze <16 x i8> %i2
+ store <16 x i8> %i3, ptr %escape
+ %i4 = extractelement <16 x i8> %i3, i64 6
+ store i8 %i4, ptr %dst
+ ret void
+}
+define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind {
+; X86-LABEL: freeze_extractelement_extra_use:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: andl $15, %eax
+; X86-NEXT: movl 16(%ebp), %ecx
+; X86-NEXT: andl $15, %ecx
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl 12(%ebp), %esi
+; X86-NEXT: movl 8(%ebp), %edi
+; X86-NEXT: vmovaps (%edi), %xmm0
+; X86-NEXT: vandps (%esi), %xmm0, %xmm0
+; X86-NEXT: vmovaps %xmm0, (%esp)
+; X86-NEXT: movzbl (%esp,%ecx), %ecx
+; X86-NEXT: cmpb (%esp,%eax), %cl
+; X86-NEXT: sete (%edx)
+; X86-NEXT: leal -8(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: freeze_extractelement_extra_use:
+; X64: # %bb.0:
+; X64-NEXT: andl $15, %ecx
+; X64-NEXT: andl $15, %edx
+; X64-NEXT: vmovaps (%rdi), %xmm0
+; X64-NEXT: vandps (%rsi), %xmm0, %xmm0
+; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movzbl -24(%rsp,%rdx), %eax
+; X64-NEXT: cmpb -24(%rsp,%rcx), %al
+; X64-NEXT: sete (%r8)
+; X64-NEXT: retq
+ %i0 = load <16 x i8>, ptr %origin0
+ %i1 = load <16 x i8>, ptr %origin1
+ %i2 = and <16 x i8> %i0, %i1
+ %i3 = freeze <16 x i8> %i2
+ %i4 = extractelement <16 x i8> %i3, i64 %idx0
+ %i5 = extractelement <16 x i8> %i3, i64 %idx1
+ %i6 = icmp eq i8 %i4, %i5
+ store i1 %i6, ptr %dst
+ ret void
+}
More information about the llvm-commits
mailing list