[llvm] e9768a2 - [x86] add test for possible load scalarization fold; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 05:49:44 PST 2022
Author: Sanjay Patel
Date: 2022-01-28T08:49:37-05:00
New Revision: e9768a2a44a1501b82e3bbf9862b4ba2cc4b9cc3
URL: https://github.com/llvm/llvm-project/commit/e9768a2a44a1501b82e3bbf9862b4ba2cc4b9cc3
DIFF: https://github.com/llvm/llvm-project/commit/e9768a2a44a1501b82e3bbf9862b4ba2cc4b9cc3.diff
LOG: [x86] add test for possible load scalarization fold; NFC
This is a minimal test to show a transform proposed in D118376.
Added:
Modified:
llvm/test/CodeGen/X86/extractelement-load.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 5c3933d96cbb..2a7ed3a8b4e7 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -300,3 +300,38 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
store i16 %10, i16* %2, align 2
ret void
}
+
+define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
+; X32-SSE2-LABEL: multi_use_load_scalarization:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT: movdqu (%ecx), %xmm0
+; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; X32-SSE2-NEXT: movd %xmm0, %eax
+; X32-SSE2-NEXT: psubd %xmm1, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, (%ecx)
+; X32-SSE2-NEXT: retl
+;
+; X64-SSSE3-LABEL: multi_use_load_scalarization:
+; X64-SSSE3: # %bb.0:
+; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0
+; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSSE3-NEXT: movd %xmm0, %eax
+; X64-SSSE3-NEXT: psubd %xmm1, %xmm0
+; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi)
+; X64-SSSE3-NEXT: retq
+;
+; X64-AVX-LABEL: multi_use_load_scalarization:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1
+; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi)
+; X64-AVX-NEXT: vmovd %xmm0, %eax
+; X64-AVX-NEXT: retq
+ %v = load <4 x i32>, <4 x i32>* %p, align 1
+ %v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %v1, <4 x i32>* %p
+ %r = extractelement <4 x i32> %v, i64 0
+ ret i32 %r
+}
More information about the llvm-commits
mailing list