[llvm] d5ebba2 - [x86] add test with volatile load; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed May 25 05:19:34 PDT 2022
Author: Sanjay Patel
Date: 2022-05-25T08:19:29-04:00
New Revision: d5ebba2aa68347eca8f3b920a61f4810d89c0f68
URL: https://github.com/llvm/llvm-project/commit/d5ebba2aa68347eca8f3b920a61f4810d89c0f68
DIFF: https://github.com/llvm/llvm-project/commit/d5ebba2aa68347eca8f3b920a61f4810d89c0f68.diff
LOG: [x86] add test with volatile load; NFC
Test for D126353
Added:
Modified:
llvm/test/CodeGen/X86/extractelement-load.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 78ad6d95963b5..5e1ff78a31c10 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -338,6 +338,41 @@ define i32 @multi_use_load_scalarization(<4 x i32>* %p) nounwind {
ret i32 %r
}
+define i32 @multi_use_volatile_load_scalarization(<4 x i32>* %p) nounwind {
+; X32-SSE2-LABEL: multi_use_volatile_load_scalarization:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT: movl (%ecx), %eax
+; X32-SSE2-NEXT: movdqu (%ecx), %xmm0
+; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; X32-SSE2-NEXT: psubd %xmm1, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, (%ecx)
+; X32-SSE2-NEXT: retl
+;
+; X64-SSSE3-LABEL: multi_use_volatile_load_scalarization:
+; X64-SSSE3: # %bb.0:
+; X64-SSSE3-NEXT: movl (%rdi), %eax
+; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0
+; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-SSSE3-NEXT: psubd %xmm1, %xmm0
+; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi)
+; X64-SSSE3-NEXT: retq
+;
+; X64-AVX-LABEL: multi_use_volatile_load_scalarization:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: movl (%rdi), %eax
+; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi)
+; X64-AVX-NEXT: retq
+ %v = load volatile <4 x i32>, <4 x i32>* %p, align 1
+ %v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %v1, <4 x i32>* %p
+ %r = extractelement <4 x i32> %v, i64 0
+ ret i32 %r
+}
+
; This test is reduced from a C source example that showed a miscompile:
; https://github.com/llvm/llvm-project/issues/53695
; The scalarized loads from 'zero' in the AVX asm must occur before
More information about the llvm-commits
mailing list