[llvm] e9768a2 - [x86] add test for possible load scalarization fold; NFC

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 28 05:49:44 PST 2022


Author: Sanjay Patel
Date: 2022-01-28T08:49:37-05:00
New Revision: e9768a2a44a1501b82e3bbf9862b4ba2cc4b9cc3

URL: https://github.com/llvm/llvm-project/commit/e9768a2a44a1501b82e3bbf9862b4ba2cc4b9cc3
DIFF: https://github.com/llvm/llvm-project/commit/e9768a2a44a1501b82e3bbf9862b4ba2cc4b9cc3.diff

LOG: [x86] add test for possible load scalarization fold; NFC

This is a minimal test to show a transform proposed in D118376.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/extractelement-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 5c3933d96cbb..2a7ed3a8b4e7 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -300,3 +300,38 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
   store i16 %10, i16* %2, align 2
   ret void
 }
+
+define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
+; X32-SSE2-LABEL: multi_use_load_scalarization:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT:    movdqu (%ecx), %xmm0
+; X32-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-SSE2-NEXT:    movd %xmm0, %eax
+; X32-SSE2-NEXT:    psubd %xmm1, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, (%ecx)
+; X32-SSE2-NEXT:    retl
+;
+; X64-SSSE3-LABEL: multi_use_load_scalarization:
+; X64-SSSE3:       # %bb.0:
+; X64-SSSE3-NEXT:    movdqu (%rdi), %xmm0
+; X64-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-SSSE3-NEXT:    movd %xmm0, %eax
+; X64-SSSE3-NEXT:    psubd %xmm1, %xmm0
+; X64-SSSE3-NEXT:    movdqa %xmm0, (%rdi)
+; X64-SSSE3-NEXT:    retq
+;
+; X64-AVX-LABEL: multi_use_load_scalarization:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
+; X64-AVX-NEXT:    vmovdqa %xmm1, (%rdi)
+; X64-AVX-NEXT:    vmovd %xmm0, %eax
+; X64-AVX-NEXT:    retq
+  %v = load <4 x i32>, <4 x i32>* %p, align 1
+  %v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+  store <4 x i32> %v1, <4 x i32>* %p
+  %r = extractelement <4 x i32> %v, i64 0
+  ret i32 %r
+}


        


More information about the llvm-commits mailing list