[llvm-branch-commits] [llvm] [X86] Cast atomic vectors in IR to support floats (PR #148899)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 30 22:12:53 PDT 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/148899
>From ab47398f3abb2b2cbe6a98ab00c1c6e1e60a19e3 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Tue, 15 Jul 2025 13:02:04 -0400
Subject: [PATCH] [X86] Cast atomic vectors in IR to support floats
This commit casts floats to ints in an atomic load during AtomicExpand to support
floating point types. It also is required to support 128 bit vectors in SSE/AVX.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +
llvm/lib/Target/X86/X86ISelLowering.h | 2 +
llvm/test/CodeGen/X86/atomic-load-store.ll | 508 ++++-----------------
3 files changed, 109 insertions(+), 408 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 49beadae63f03..e15f17281b958 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32136,6 +32136,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
}
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ if (LI->getType()->getScalarType()->isFloatingPointTy())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b7151f65942b4..f9a8adbd7da0d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1841,6 +1841,8 @@ namespace llvm {
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index d9ee379b4bd2f..4e8f637f34113 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3,CHECK-SSE1-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3,CHECK-SSE2-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX2-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX512-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0,CHECK-SSE1-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0,CHECK-SSE2-O0
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX2-O0
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX512-O0
@@ -119,13 +119,13 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
@@ -133,8 +133,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
@@ -142,8 +141,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
ret <1 x bfloat> %ret
@@ -296,22 +294,9 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
}
define <2 x half> @atomic_vec2_half(ptr %x) {
-; CHECK-O3-LABEL: atomic_vec2_half:
-; CHECK-O3: # %bb.0:
-; CHECK-O3-NEXT: movl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O3-NEXT: shrl $16, %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; CHECK-O3-NEXT: retq
-;
; CHECK-SSE-O3-LABEL: atomic_vec2_half:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O3-NEXT: shrl $16, %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-SSE-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_half:
@@ -319,153 +304,38 @@ define <2 x half> @atomic_vec2_half(ptr %x) {
; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O3-NEXT: retq
;
-; CHECK-AVX512-O3-LABEL: atomic_vec2_half:
-; CHECK-AVX512-O3: # %bb.0:
-; CHECK-AVX512-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-AVX512-O3-NEXT: retq
-;
-; CHECK-O0-LABEL: atomic_vec2_half:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movl (%rdi), %eax
-; CHECK-O0-NEXT: movl %eax, %ecx
-; CHECK-O0-NEXT: shrl $16, %ecx
-; CHECK-O0-NEXT: movw %cx, %dx
-; CHECK-O0-NEXT: # implicit-def: $ecx
-; CHECK-O0-NEXT: movw %dx, %cx
-; CHECK-O0-NEXT: # implicit-def: $xmm1
-; CHECK-O0-NEXT: pinsrw $0, %ecx, %xmm1
-; CHECK-O0-NEXT: movw %ax, %cx
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; CHECK-O0-NEXT: retq
-;
; CHECK-SSE-O0-LABEL: atomic_vec2_half:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O0-NEXT: movl %eax, %ecx
-; CHECK-SSE-O0-NEXT: shrl $16, %ecx
-; CHECK-SSE-O0-NEXT: movw %cx, %dx
-; CHECK-SSE-O0-NEXT: # implicit-def: $ecx
-; CHECK-SSE-O0-NEXT: movw %dx, %cx
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm1
-; CHECK-SSE-O0-NEXT: pinsrw $0, %ecx, %xmm1
-; CHECK-SSE-O0-NEXT: movw %ax, %cx
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_half:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O0-NEXT: retq
-;
-; CHECK-AVX512-O0-LABEL: atomic_vec2_half:
-; CHECK-AVX512-O0: # %bb.0:
-; CHECK-AVX512-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-AVX512-O0-NEXT: retq
%ret = load atomic <2 x half>, ptr %x acquire, align 4
ret <2 x half> %ret
}
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
-; CHECK-O3-LABEL: atomic_vec2_bfloat:
-; CHECK-O3: # %bb.0:
-; CHECK-O3-NEXT: movl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O3-NEXT: shrl $16, %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; CHECK-O3-NEXT: retq
-;
; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O3-NEXT: shrl $16, %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-SSE-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O3-NEXT: shrl $16, %eax
-; CHECK-AVX-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O3-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O3-NEXT: retq
;
-; CHECK-AVX512-O3-LABEL: atomic_vec2_bfloat:
-; CHECK-AVX512-O3: # %bb.0:
-; CHECK-AVX512-O3-NEXT: movl (%rdi), %eax
-; CHECK-AVX512-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O3-NEXT: shrl $16, %eax
-; CHECK-AVX512-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O3-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
-; CHECK-AVX512-O3-NEXT: retq
-;
-; CHECK-O0-LABEL: atomic_vec2_bfloat:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movl (%rdi), %eax
-; CHECK-O0-NEXT: movl %eax, %ecx
-; CHECK-O0-NEXT: shrl $16, %ecx
-; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx
-; CHECK-O0-NEXT: movw %ax, %dx
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %dx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm1
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; CHECK-O0-NEXT: retq
-;
; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O0-NEXT: movl %eax, %ecx
-; CHECK-SSE-O0-NEXT: shrl $16, %ecx
-; CHECK-SSE-O0-NEXT: # kill: def $cx killed $cx killed $ecx
-; CHECK-SSE-O0-NEXT: movw %ax, %dx
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %dx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm1
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-SSE-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
-; CHECK-AVX-O0-NEXT: movw %ax, %cx
-; CHECK-AVX-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O0-NEXT: shrl $16, %eax
-; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-AVX-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O0-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O0-NEXT: retq
-;
-; CHECK-AVX512-O0-LABEL: atomic_vec2_bfloat:
-; CHECK-AVX512-O0: # %bb.0:
-; CHECK-AVX512-O0-NEXT: movl (%rdi), %eax
-; CHECK-AVX512-O0-NEXT: movw %ax, %cx
-; CHECK-AVX512-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O0-NEXT: shrl $16, %eax
-; CHECK-AVX512-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-AVX512-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O0-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
-; CHECK-AVX512-O0-NEXT: retq
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
ret <2 x bfloat> %ret
}
@@ -501,13 +371,13 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
@@ -515,8 +385,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
@@ -524,8 +393,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x half>, ptr %x acquire, align 2
ret <1 x half> %ret
@@ -760,38 +628,9 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
}
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec4_half:
-; CHECK-O3: # %bb.0:
-; CHECK-O3-NEXT: movq (%rdi), %rax
-; CHECK-O3-NEXT: movl %eax, %ecx
-; CHECK-O3-NEXT: shrl $16, %ecx
-; CHECK-O3-NEXT: pinsrw $0, %ecx, %xmm1
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O3-NEXT: movq %rax, %rcx
-; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: pinsrw $0, %ecx, %xmm2
-; CHECK-O3-NEXT: shrq $48, %rax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm3
-; CHECK-O3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; CHECK-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; CHECK-O3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-O3-NEXT: retq
-;
; CHECK-SSE-O3-LABEL: atomic_vec4_half:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O3-NEXT: movl %eax, %ecx
-; CHECK-SSE-O3-NEXT: shrl $16, %ecx
-; CHECK-SSE-O3-NEXT: pinsrw $0, %ecx, %xmm1
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O3-NEXT: movq %rax, %rcx
-; CHECK-SSE-O3-NEXT: shrq $32, %rcx
-; CHECK-SSE-O3-NEXT: pinsrw $0, %ecx, %xmm2
-; CHECK-SSE-O3-NEXT: shrq $48, %rax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm3
-; CHECK-SSE-O3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; CHECK-SSE-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; CHECK-SSE-O3-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],zero,zero
+; CHECK-SSE-O3-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec4_half:
@@ -799,261 +638,38 @@ define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
; CHECK-AVX-O3-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O3-NEXT: retq
;
-; CHECK-AVX512-O3-LABEL: atomic_vec4_half:
-; CHECK-AVX512-O3: # %bb.0:
-; CHECK-AVX512-O3-NEXT: vmovq (%rdi), %xmm0
-; CHECK-AVX512-O3-NEXT: retq
-;
-; CHECK-O0-LABEL: atomic_vec4_half:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movl %eax, %ecx
-; CHECK-O0-NEXT: shrl $16, %ecx
-; CHECK-O0-NEXT: movw %cx, %dx
-; CHECK-O0-NEXT: # implicit-def: $ecx
-; CHECK-O0-NEXT: movw %dx, %cx
-; CHECK-O0-NEXT: # implicit-def: $xmm2
-; CHECK-O0-NEXT: pinsrw $0, %ecx, %xmm2
-; CHECK-O0-NEXT: movw %ax, %dx
-; CHECK-O0-NEXT: # implicit-def: $ecx
-; CHECK-O0-NEXT: movw %dx, %cx
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %ecx, %xmm0
-; CHECK-O0-NEXT: movq %rax, %rcx
-; CHECK-O0-NEXT: shrq $32, %rcx
-; CHECK-O0-NEXT: movw %cx, %dx
-; CHECK-O0-NEXT: # implicit-def: $ecx
-; CHECK-O0-NEXT: movw %dx, %cx
-; CHECK-O0-NEXT: # implicit-def: $xmm1
-; CHECK-O0-NEXT: pinsrw $0, %ecx, %xmm1
-; CHECK-O0-NEXT: shrq $48, %rax
-; CHECK-O0-NEXT: movw %ax, %cx
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm3
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm3
-; CHECK-O0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; CHECK-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; CHECK-O0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-O0-NEXT: retq
-;
; CHECK-SSE-O0-LABEL: atomic_vec4_half:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O0-NEXT: movl %eax, %ecx
-; CHECK-SSE-O0-NEXT: shrl $16, %ecx
-; CHECK-SSE-O0-NEXT: movw %cx, %dx
-; CHECK-SSE-O0-NEXT: # implicit-def: $ecx
-; CHECK-SSE-O0-NEXT: movw %dx, %cx
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm2
-; CHECK-SSE-O0-NEXT: pinsrw $0, %ecx, %xmm2
-; CHECK-SSE-O0-NEXT: movw %ax, %dx
-; CHECK-SSE-O0-NEXT: # implicit-def: $ecx
-; CHECK-SSE-O0-NEXT: movw %dx, %cx
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %ecx, %xmm0
-; CHECK-SSE-O0-NEXT: movq %rax, %rcx
-; CHECK-SSE-O0-NEXT: shrq $32, %rcx
-; CHECK-SSE-O0-NEXT: movw %cx, %dx
-; CHECK-SSE-O0-NEXT: # implicit-def: $ecx
-; CHECK-SSE-O0-NEXT: movw %dx, %cx
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm1
-; CHECK-SSE-O0-NEXT: pinsrw $0, %ecx, %xmm1
-; CHECK-SSE-O0-NEXT: shrq $48, %rax
-; CHECK-SSE-O0-NEXT: movw %ax, %cx
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm3
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm3
-; CHECK-SSE-O0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; CHECK-SSE-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; CHECK-SSE-O0-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; CHECK-SSE-O0-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec4_half:
; CHECK-AVX-O0: # %bb.0:
; CHECK-AVX-O0-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O0-NEXT: retq
-;
-; CHECK-AVX512-O0-LABEL: atomic_vec4_half:
-; CHECK-AVX512-O0: # %bb.0:
-; CHECK-AVX512-O0-NEXT: vmovq (%rdi), %xmm0
-; CHECK-AVX512-O0-NEXT: retq
%ret = load atomic <4 x half>, ptr %x acquire, align 8
ret <4 x half> %ret
}
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec4_bfloat:
-; CHECK-O3: # %bb.0:
-; CHECK-O3-NEXT: movq (%rdi), %rax
-; CHECK-O3-NEXT: movq %rax, %rcx
-; CHECK-O3-NEXT: movq %rax, %rdx
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
-; CHECK-O3-NEXT: shrl $16, %eax
-; CHECK-O3-NEXT: shrq $32, %rcx
-; CHECK-O3-NEXT: shrq $48, %rdx
-; CHECK-O3-NEXT: pinsrw $0, %edx, %xmm1
-; CHECK-O3-NEXT: pinsrw $0, %ecx, %xmm2
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm3
-; CHECK-O3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; CHECK-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; CHECK-O3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-O3-NEXT: retq
-;
; CHECK-SSE-O3-LABEL: atomic_vec4_bfloat:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O3-NEXT: movq %rax, %rcx
-; CHECK-SSE-O3-NEXT: movq %rax, %rdx
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O3-NEXT: # kill: def $eax killed $eax killed $rax
-; CHECK-SSE-O3-NEXT: shrl $16, %eax
-; CHECK-SSE-O3-NEXT: shrq $32, %rcx
-; CHECK-SSE-O3-NEXT: shrq $48, %rdx
-; CHECK-SSE-O3-NEXT: pinsrw $0, %edx, %xmm1
-; CHECK-SSE-O3-NEXT: pinsrw $0, %ecx, %xmm2
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm3
-; CHECK-SSE-O3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; CHECK-SSE-O3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; CHECK-SSE-O3-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],zero,zero
+; CHECK-SSE-O3-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec4_bfloat:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O3-NEXT: movq %rax, %rcx
-; CHECK-AVX-O3-NEXT: shrq $48, %rcx
-; CHECK-AVX-O3-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O3-NEXT: movq %rax, %rcx
-; CHECK-AVX-O3-NEXT: shrq $32, %rcx
-; CHECK-AVX-O3-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O3-NEXT: shrl $16, %eax
-; CHECK-AVX-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O3-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; CHECK-AVX-O3-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O3-NEXT: retq
;
-; CHECK-AVX512-O3-LABEL: atomic_vec4_bfloat:
-; CHECK-AVX512-O3: # %bb.0:
-; CHECK-AVX512-O3-NEXT: movq (%rdi), %rax
-; CHECK-AVX512-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O3-NEXT: movq %rax, %rcx
-; CHECK-AVX512-O3-NEXT: shrq $48, %rcx
-; CHECK-AVX512-O3-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O3-NEXT: movq %rax, %rcx
-; CHECK-AVX512-O3-NEXT: shrq $32, %rcx
-; CHECK-AVX512-O3-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O3-NEXT: shrl $16, %eax
-; CHECK-AVX512-O3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O3-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
-; CHECK-AVX512-O3-NEXT: retq
-;
-; CHECK-O0-LABEL: atomic_vec4_bfloat:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movl %eax, %ecx
-; CHECK-O0-NEXT: shrl $16, %ecx
-; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx
-; CHECK-O0-NEXT: movw %ax, %dx
-; CHECK-O0-NEXT: movq %rax, %rsi
-; CHECK-O0-NEXT: shrq $32, %rsi
-; CHECK-O0-NEXT: # kill: def $si killed $si killed $rsi
-; CHECK-O0-NEXT: shrq $48, %rax
-; CHECK-O0-NEXT: movw %ax, %di
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %di, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %si, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm1
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-O0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %dx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-O0-NEXT: # implicit-def: $eax
-; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm2
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm2
-; CHECK-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; CHECK-O0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-O0-NEXT: retq
-;
; CHECK-SSE-O0-LABEL: atomic_vec4_bfloat:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O0-NEXT: movl %eax, %ecx
-; CHECK-SSE-O0-NEXT: shrl $16, %ecx
-; CHECK-SSE-O0-NEXT: # kill: def $cx killed $cx killed $ecx
-; CHECK-SSE-O0-NEXT: movw %ax, %dx
-; CHECK-SSE-O0-NEXT: movq %rax, %rsi
-; CHECK-SSE-O0-NEXT: shrq $32, %rsi
-; CHECK-SSE-O0-NEXT: # kill: def $si killed $si killed $rsi
-; CHECK-SSE-O0-NEXT: shrq $48, %rax
-; CHECK-SSE-O0-NEXT: movw %ax, %di
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %di, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %si, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm1
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm1
-; CHECK-SSE-O0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %dx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
-; CHECK-SSE-O0-NEXT: # implicit-def: $eax
-; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm2
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm2
-; CHECK-SSE-O0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; CHECK-SSE-O0-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; CHECK-SSE-O0-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec4_bfloat:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O0-NEXT: movq %rax, %rcx
-; CHECK-AVX-O0-NEXT: shrq $48, %rcx
-; CHECK-AVX-O0-NEXT: # kill: def $cx killed $cx killed $rcx
-; CHECK-AVX-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O0-NEXT: movq %rax, %rcx
-; CHECK-AVX-O0-NEXT: shrq $32, %rcx
-; CHECK-AVX-O0-NEXT: # kill: def $cx killed $cx killed $rcx
-; CHECK-AVX-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O0-NEXT: movw %ax, %cx
-; CHECK-AVX-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O0-NEXT: # kill: def $eax killed $eax killed $rax
-; CHECK-AVX-O0-NEXT: shrl $16, %eax
-; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-AVX-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX-O0-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; CHECK-AVX-O0-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O0-NEXT: retq
-;
-; CHECK-AVX512-O0-LABEL: atomic_vec4_bfloat:
-; CHECK-AVX512-O0: # %bb.0:
-; CHECK-AVX512-O0-NEXT: movq (%rdi), %rax
-; CHECK-AVX512-O0-NEXT: movq %rax, %rcx
-; CHECK-AVX512-O0-NEXT: shrq $48, %rcx
-; CHECK-AVX512-O0-NEXT: # kill: def $cx killed $cx killed $rcx
-; CHECK-AVX512-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O0-NEXT: movq %rax, %rcx
-; CHECK-AVX512-O0-NEXT: shrq $32, %rcx
-; CHECK-AVX512-O0-NEXT: # kill: def $cx killed $cx killed $rcx
-; CHECK-AVX512-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O0-NEXT: movw %ax, %cx
-; CHECK-AVX512-O0-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O0-NEXT: # kill: def $eax killed $eax killed $rax
-; CHECK-AVX512-O0-NEXT: shrl $16, %eax
-; CHECK-AVX512-O0-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-AVX512-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-AVX512-O0-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
-; CHECK-AVX512-O0-NEXT: retq
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
ret <4 x bfloat> %ret
}
@@ -1110,6 +726,82 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
ret <4 x float> %ret
}
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-SSE1-O-LABEL: atomic_vec4_float_align:
+; CHECK-SSE1-O: # %bb.0:
+; CHECK-SSE1-O-NEXT: pushq %rax
+; CHECK-SSE1-O-NEXT: movl $2, %esi
+; CHECK-SSE1-O-NEXT: callq __atomic_load_16 at PLT
+; CHECK-SSE1-O-NEXT: movq %rdx, %xmm1
+; CHECK-SSE1-O-NEXT: movq %rax, %xmm0
+; CHECK-SSE1-O-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE1-O-NEXT: popq %rax
+; CHECK-SSE1-O-NEXT: retq
+;
+; CHECK-SSE1-O3-LABEL: atomic_vec4_float_align:
+; CHECK-SSE1-O3: # %bb.0:
+; CHECK-SSE1-O3-NEXT: pushq %rax
+; CHECK-SSE1-O3-NEXT: movl $2, %esi
+; CHECK-SSE1-O3-NEXT: callq __atomic_load_16 at PLT
+; CHECK-SSE1-O3-NEXT: movq %rdx, %xmm1
+; CHECK-SSE1-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE1-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE1-O3-NEXT: popq %rax
+; CHECK-SSE1-O3-NEXT: retq
+;
+; CHECK-SSE2-O3-LABEL: atomic_vec4_float_align:
+; CHECK-SSE2-O3: # %bb.0:
+; CHECK-SSE2-O3-NEXT: pushq %rbx
+; CHECK-SSE2-O3-NEXT: xorl %eax, %eax
+; CHECK-SSE2-O3-NEXT: xorl %edx, %edx
+; CHECK-SSE2-O3-NEXT: xorl %ecx, %ecx
+; CHECK-SSE2-O3-NEXT: xorl %ebx, %ebx
+; CHECK-SSE2-O3-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE2-O3-NEXT: movq %rdx, %xmm1
+; CHECK-SSE2-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE2-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-O3-NEXT: popq %rbx
+; CHECK-SSE2-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE1-O0-LABEL: atomic_vec4_float_align:
+; CHECK-SSE1-O0: # %bb.0:
+; CHECK-SSE1-O0-NEXT: pushq %rax
+; CHECK-SSE1-O0-NEXT: movl $2, %esi
+; CHECK-SSE1-O0-NEXT: callq __atomic_load_16 at PLT
+; CHECK-SSE1-O0-NEXT: movq %rdx, %xmm1
+; CHECK-SSE1-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE1-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE1-O0-NEXT: popq %rax
+; CHECK-SSE1-O0-NEXT: retq
+;
+; CHECK-SSE2-O0-LABEL: atomic_vec4_float_align:
+; CHECK-SSE2-O0: # %bb.0:
+; CHECK-SSE2-O0-NEXT: pushq %rbx
+; CHECK-SSE2-O0-NEXT: xorl %eax, %eax
+; CHECK-SSE2-O0-NEXT: movl %eax, %ebx
+; CHECK-SSE2-O0-NEXT: movq %rbx, %rax
+; CHECK-SSE2-O0-NEXT: movq %rbx, %rdx
+; CHECK-SSE2-O0-NEXT: movq %rbx, %rcx
+; CHECK-SSE2-O0-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE2-O0-NEXT: movq %rdx, %xmm1
+; CHECK-SSE2-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE2-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE2-O0-NEXT: popq %rbx
+; CHECK-SSE2-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 16
+ ret <4 x float> %ret
+}
+
define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec8_double:
; CHECK-SSE-O3: # %bb.0:
More information about the llvm-branch-commits
mailing list