[llvm] [X86] atomic-load-store.ll - cleanup test check-prefix hierarchies to improve reuse and fix missing AVX2/AVX512 checks (PR #165552)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 29 06:05:00 PDT 2025


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/165552

-mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed

Noticed while reviewing #148897

>From 18f3d22e0804095517209ba6f81c74650c9517e9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 29 Oct 2025 13:04:18 +0000
Subject: [PATCH] [X86] atomic-load-store.ll - cleanup test check-prefix
 hierarchies to improve reuse and fix missing AVX2/AVX512 checks

-mcpu=x86-64 is still SSE codegen, and there were missing AVX2/AVX512 checks where the common CHECK-AVX prefix clashed

Noticed while reviewing #148897
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 504 +++++----------------
 1 file changed, 106 insertions(+), 398 deletions(-)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3e7b73a65fe07..1173c45b4bfd8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64    | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64    | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64    | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-SSE-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX2-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3,CHECK-AVX-O3,CHECK-AVX512-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64    | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-SSE-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX2-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0,CHECK-AVX-O0,CHECK-AVX512-O0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -50,30 +50,10 @@ define <1 x i8> @atomic_vec1_i8(ptr %x) {
 ; CHECK-O3-NEXT:    movzbl (%rdi), %eax
 ; CHECK-O3-NEXT:    retq
 ;
-; CHECK-SSE-O3-LABEL: atomic_vec1_i8:
-; CHECK-SSE-O3:       # %bb.0:
-; CHECK-SSE-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-SSE-O3-NEXT:    retq
-;
-; CHECK-AVX-O3-LABEL: atomic_vec1_i8:
-; CHECK-AVX-O3:       # %bb.0:
-; CHECK-AVX-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-AVX-O3-NEXT:    retq
-;
 ; CHECK-O0-LABEL: atomic_vec1_i8:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    movb (%rdi), %al
 ; CHECK-O0-NEXT:    retq
-;
-; CHECK-SSE-O0-LABEL: atomic_vec1_i8:
-; CHECK-SSE-O0:       # %bb.0:
-; CHECK-SSE-O0-NEXT:    movb (%rdi), %al
-; CHECK-SSE-O0-NEXT:    retq
-;
-; CHECK-AVX-O0-LABEL: atomic_vec1_i8:
-; CHECK-AVX-O0:       # %bb.0:
-; CHECK-AVX-O0-NEXT:    movb (%rdi), %al
-; CHECK-AVX-O0-NEXT:    retq
   %ret = load atomic <1 x i8>, ptr %x acquire, align 1
   ret <1 x i8> %ret
 }
@@ -84,30 +64,10 @@ define <1 x i16> @atomic_vec1_i16(ptr %x) {
 ; CHECK-O3-NEXT:    movzwl (%rdi), %eax
 ; CHECK-O3-NEXT:    retq
 ;
-; CHECK-SSE-O3-LABEL: atomic_vec1_i16:
-; CHECK-SSE-O3:       # %bb.0:
-; CHECK-SSE-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT:    retq
-;
-; CHECK-AVX-O3-LABEL: atomic_vec1_i16:
-; CHECK-AVX-O3:       # %bb.0:
-; CHECK-AVX-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT:    retq
-;
 ; CHECK-O0-LABEL: atomic_vec1_i16:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    movw (%rdi), %ax
 ; CHECK-O0-NEXT:    retq
-;
-; CHECK-SSE-O0-LABEL: atomic_vec1_i16:
-; CHECK-SSE-O0:       # %bb.0:
-; CHECK-SSE-O0-NEXT:    movw (%rdi), %ax
-; CHECK-SSE-O0-NEXT:    retq
-;
-; CHECK-AVX-O0-LABEL: atomic_vec1_i16:
-; CHECK-AVX-O0:       # %bb.0:
-; CHECK-AVX-O0-NEXT:    movw (%rdi), %ax
-; CHECK-AVX-O0-NEXT:    retq
   %ret = load atomic <1 x i16>, ptr %x acquire, align 2
   ret <1 x i16> %ret
 }
@@ -119,35 +79,11 @@ define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
 ; CHECK-O3-NEXT:    movzbl %al, %eax
 ; CHECK-O3-NEXT:    retq
 ;
-; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext:
-; CHECK-SSE-O3:       # %bb.0:
-; CHECK-SSE-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-SSE-O3-NEXT:    movzbl %al, %eax
-; CHECK-SSE-O3-NEXT:    retq
-;
-; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext:
-; CHECK-AVX-O3:       # %bb.0:
-; CHECK-AVX-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-AVX-O3-NEXT:    movzbl %al, %eax
-; CHECK-AVX-O3-NEXT:    retq
-;
 ; CHECK-O0-LABEL: atomic_vec1_i8_zext:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    movb (%rdi), %al
 ; CHECK-O0-NEXT:    movzbl %al, %eax
 ; CHECK-O0-NEXT:    retq
-;
-; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext:
-; CHECK-SSE-O0:       # %bb.0:
-; CHECK-SSE-O0-NEXT:    movb (%rdi), %al
-; CHECK-SSE-O0-NEXT:    movzbl %al, %eax
-; CHECK-SSE-O0-NEXT:    retq
-;
-; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext:
-; CHECK-AVX-O0:       # %bb.0:
-; CHECK-AVX-O0-NEXT:    movb (%rdi), %al
-; CHECK-AVX-O0-NEXT:    movzbl %al, %eax
-; CHECK-AVX-O0-NEXT:    retq
   %ret = load atomic <1 x i8>, ptr %x acquire, align 1
   %zret = zext <1 x i8> %ret to <1 x i32>
   ret <1 x i32> %zret
@@ -160,35 +96,11 @@ define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
 ; CHECK-O3-NEXT:    movswq %ax, %rax
 ; CHECK-O3-NEXT:    retq
 ;
-; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext:
-; CHECK-SSE-O3:       # %bb.0:
-; CHECK-SSE-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT:    movswq %ax, %rax
-; CHECK-SSE-O3-NEXT:    retq
-;
-; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext:
-; CHECK-AVX-O3:       # %bb.0:
-; CHECK-AVX-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT:    movswq %ax, %rax
-; CHECK-AVX-O3-NEXT:    retq
-;
 ; CHECK-O0-LABEL: atomic_vec1_i16_sext:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    movw (%rdi), %ax
 ; CHECK-O0-NEXT:    movswq %ax, %rax
 ; CHECK-O0-NEXT:    retq
-;
-; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext:
-; CHECK-SSE-O0:       # %bb.0:
-; CHECK-SSE-O0-NEXT:    movw (%rdi), %ax
-; CHECK-SSE-O0-NEXT:    movswq %ax, %rax
-; CHECK-SSE-O0-NEXT:    retq
-;
-; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext:
-; CHECK-AVX-O0:       # %bb.0:
-; CHECK-AVX-O0-NEXT:    movw (%rdi), %ax
-; CHECK-AVX-O0-NEXT:    movswq %ax, %rax
-; CHECK-AVX-O0-NEXT:    retq
   %ret = load atomic <1 x i16>, ptr %x acquire, align 2
   %sret = sext <1 x i16> %ret to <1 x i64>
   ret <1 x i64> %sret
@@ -204,12 +116,6 @@ define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
 }
 
 define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
-; CHECK-O3-LABEL: atomic_vec1_bfloat:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-O3-NEXT:    pinsrw $0, %eax, %xmm0
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    movzwl (%rdi), %eax
@@ -222,15 +128,6 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
 ; CHECK-AVX-O3-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec1_bfloat:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movw (%rdi), %cx
-; CHECK-O0-NEXT:    # implicit-def: $eax
-; CHECK-O0-NEXT:    movw %cx, %ax
-; CHECK-O0-NEXT:    # implicit-def: $xmm0
-; CHECK-O0-NEXT:    pinsrw $0, %eax, %xmm0
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    movw (%rdi), %cx
@@ -283,30 +180,6 @@ define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
 ; CHECK-O3-NEXT:    popq %rcx
 ; CHECK-O3-NEXT:    retq
 ;
-; CHECK-SSE-O3-LABEL: atomic_vec1_ptr:
-; CHECK-SSE-O3:       # %bb.0:
-; CHECK-SSE-O3-NEXT:    pushq %rax
-; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
-; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
-; CHECK-SSE-O3-NEXT:    movl $8, %edi
-; CHECK-SSE-O3-NEXT:    movl $2, %ecx
-; CHECK-SSE-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-SSE-O3-NEXT:    movq (%rsp), %rax
-; CHECK-SSE-O3-NEXT:    popq %rcx
-; CHECK-SSE-O3-NEXT:    retq
-;
-; CHECK-AVX-O3-LABEL: atomic_vec1_ptr:
-; CHECK-AVX-O3:       # %bb.0:
-; CHECK-AVX-O3-NEXT:    pushq %rax
-; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
-; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
-; CHECK-AVX-O3-NEXT:    movl $8, %edi
-; CHECK-AVX-O3-NEXT:    movl $2, %ecx
-; CHECK-AVX-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-AVX-O3-NEXT:    movq (%rsp), %rax
-; CHECK-AVX-O3-NEXT:    popq %rcx
-; CHECK-AVX-O3-NEXT:    retq
-;
 ; CHECK-O0-LABEL: atomic_vec1_ptr:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    pushq %rax
@@ -318,41 +191,11 @@ define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
 ; CHECK-O0-NEXT:    movq (%rsp), %rax
 ; CHECK-O0-NEXT:    popq %rcx
 ; CHECK-O0-NEXT:    retq
-;
-; CHECK-SSE-O0-LABEL: atomic_vec1_ptr:
-; CHECK-SSE-O0:       # %bb.0:
-; CHECK-SSE-O0-NEXT:    pushq %rax
-; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
-; CHECK-SSE-O0-NEXT:    movl $8, %edi
-; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
-; CHECK-SSE-O0-NEXT:    movl $2, %ecx
-; CHECK-SSE-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-SSE-O0-NEXT:    movq (%rsp), %rax
-; CHECK-SSE-O0-NEXT:    popq %rcx
-; CHECK-SSE-O0-NEXT:    retq
-;
-; CHECK-AVX-O0-LABEL: atomic_vec1_ptr:
-; CHECK-AVX-O0:       # %bb.0:
-; CHECK-AVX-O0-NEXT:    pushq %rax
-; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
-; CHECK-AVX-O0-NEXT:    movl $8, %edi
-; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
-; CHECK-AVX-O0-NEXT:    movl $2, %ecx
-; CHECK-AVX-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-AVX-O0-NEXT:    movq (%rsp), %rax
-; CHECK-AVX-O0-NEXT:    popq %rcx
-; CHECK-AVX-O0-NEXT:    retq
   %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
   ret <1 x ptr> %ret
 }
 
 define <1 x half> @atomic_vec1_half(ptr %x) {
-; CHECK-O3-LABEL: atomic_vec1_half:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzwl (%rdi), %eax
-; CHECK-O3-NEXT:    pinsrw $0, %eax, %xmm0
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec1_half:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    movzwl (%rdi), %eax
@@ -365,15 +208,6 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK-AVX-O3-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec1_half:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movw (%rdi), %cx
-; CHECK-O0-NEXT:    # implicit-def: $eax
-; CHECK-O0-NEXT:    movw %cx, %ax
-; CHECK-O0-NEXT:    # implicit-def: $xmm0
-; CHECK-O0-NEXT:    pinsrw $0, %eax, %xmm0
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec1_half:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    movw (%rdi), %cx
@@ -396,11 +230,6 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
 }
 
 define <1 x float> @atomic_vec1_float(ptr %x) {
-; CHECK-O3-LABEL: atomic_vec1_float:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec1_float:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -411,11 +240,6 @@ define <1 x float> @atomic_vec1_float(ptr %x) {
 ; CHECK-AVX-O3-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec1_float:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec1_float:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -430,11 +254,6 @@ define <1 x float> @atomic_vec1_float(ptr %x) {
 }
 
 define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec1_double_align:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec1_double_align:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
@@ -445,11 +264,6 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
 ; CHECK-AVX-O3-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec1_double_align:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec1_double_align:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
@@ -476,30 +290,6 @@ define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
 ; CHECK-O3-NEXT:    popq %rcx
 ; CHECK-O3-NEXT:    retq
 ;
-; CHECK-SSE-O3-LABEL: atomic_vec1_i64:
-; CHECK-SSE-O3:       # %bb.0:
-; CHECK-SSE-O3-NEXT:    pushq %rax
-; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
-; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
-; CHECK-SSE-O3-NEXT:    movl $8, %edi
-; CHECK-SSE-O3-NEXT:    movl $2, %ecx
-; CHECK-SSE-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-SSE-O3-NEXT:    movq (%rsp), %rax
-; CHECK-SSE-O3-NEXT:    popq %rcx
-; CHECK-SSE-O3-NEXT:    retq
-;
-; CHECK-AVX-O3-LABEL: atomic_vec1_i64:
-; CHECK-AVX-O3:       # %bb.0:
-; CHECK-AVX-O3-NEXT:    pushq %rax
-; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
-; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
-; CHECK-AVX-O3-NEXT:    movl $8, %edi
-; CHECK-AVX-O3-NEXT:    movl $2, %ecx
-; CHECK-AVX-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-AVX-O3-NEXT:    movq (%rsp), %rax
-; CHECK-AVX-O3-NEXT:    popq %rcx
-; CHECK-AVX-O3-NEXT:    retq
-;
 ; CHECK-O0-LABEL: atomic_vec1_i64:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    pushq %rax
@@ -511,47 +301,11 @@ define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
 ; CHECK-O0-NEXT:    movq (%rsp), %rax
 ; CHECK-O0-NEXT:    popq %rcx
 ; CHECK-O0-NEXT:    retq
-;
-; CHECK-SSE-O0-LABEL: atomic_vec1_i64:
-; CHECK-SSE-O0:       # %bb.0:
-; CHECK-SSE-O0-NEXT:    pushq %rax
-; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
-; CHECK-SSE-O0-NEXT:    movl $8, %edi
-; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
-; CHECK-SSE-O0-NEXT:    movl $2, %ecx
-; CHECK-SSE-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-SSE-O0-NEXT:    movq (%rsp), %rax
-; CHECK-SSE-O0-NEXT:    popq %rcx
-; CHECK-SSE-O0-NEXT:    retq
-;
-; CHECK-AVX-O0-LABEL: atomic_vec1_i64:
-; CHECK-AVX-O0:       # %bb.0:
-; CHECK-AVX-O0-NEXT:    pushq %rax
-; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
-; CHECK-AVX-O0-NEXT:    movl $8, %edi
-; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
-; CHECK-AVX-O0-NEXT:    movl $2, %ecx
-; CHECK-AVX-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-AVX-O0-NEXT:    movq (%rsp), %rax
-; CHECK-AVX-O0-NEXT:    popq %rcx
-; CHECK-AVX-O0-NEXT:    retq
   %ret = load atomic <1 x i64>, ptr %x acquire, align 4
   ret <1 x i64> %ret
 }
 
 define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec1_double:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    pushq %rax
-; CHECK-O3-NEXT:    movq %rdi, %rsi
-; CHECK-O3-NEXT:    movq %rsp, %rdx
-; CHECK-O3-NEXT:    movl $8, %edi
-; CHECK-O3-NEXT:    movl $2, %ecx
-; CHECK-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-O3-NEXT:    popq %rax
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec1_double:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    pushq %rax
@@ -576,18 +330,6 @@ define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
 ; CHECK-AVX-O3-NEXT:    popq %rax
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec1_double:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    pushq %rax
-; CHECK-O0-NEXT:    movq %rdi, %rsi
-; CHECK-O0-NEXT:    movl $8, %edi
-; CHECK-O0-NEXT:    movq %rsp, %rdx
-; CHECK-O0-NEXT:    movl $2, %ecx
-; CHECK-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-O0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-O0-NEXT:    popq %rax
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec1_double:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    pushq %rax
@@ -616,18 +358,6 @@ define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
 }
 
 define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec2_i32:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    pushq %rax
-; CHECK-O3-NEXT:    movq %rdi, %rsi
-; CHECK-O3-NEXT:    movq %rsp, %rdx
-; CHECK-O3-NEXT:    movl $8, %edi
-; CHECK-O3-NEXT:    movl $2, %ecx
-; CHECK-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-O3-NEXT:    popq %rax
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec2_i32:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    pushq %rax
@@ -652,18 +382,6 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
 ; CHECK-AVX-O3-NEXT:    popq %rax
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec2_i32:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    pushq %rax
-; CHECK-O0-NEXT:    movq %rdi, %rsi
-; CHECK-O0-NEXT:    movl $8, %edi
-; CHECK-O0-NEXT:    movq %rsp, %rdx
-; CHECK-O0-NEXT:    movl $2, %ecx
-; CHECK-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-O0-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-O0-NEXT:    popq %rax
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec2_i32:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    pushq %rax
@@ -692,18 +410,6 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
 }
 
 define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec4_float:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    subq $24, %rsp
-; CHECK-O3-NEXT:    movq %rdi, %rsi
-; CHECK-O3-NEXT:    movq %rsp, %rdx
-; CHECK-O3-NEXT:    movl $16, %edi
-; CHECK-O3-NEXT:    movl $2, %ecx
-; CHECK-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O3-NEXT:    addq $24, %rsp
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec4_float:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    subq $24, %rsp
@@ -728,18 +434,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
 ; CHECK-AVX-O3-NEXT:    addq $24, %rsp
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec4_float:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    subq $24, %rsp
-; CHECK-O0-NEXT:    movq %rdi, %rsi
-; CHECK-O0-NEXT:    movl $16, %edi
-; CHECK-O0-NEXT:    movq %rsp, %rdx
-; CHECK-O0-NEXT:    movl $2, %ecx
-; CHECK-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-O0-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O0-NEXT:    addq $24, %rsp
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec4_float:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    subq $24, %rsp
@@ -768,21 +462,6 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
 }
 
 define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec8_double:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    subq $72, %rsp
-; CHECK-O3-NEXT:    movq %rdi, %rsi
-; CHECK-O3-NEXT:    movq %rsp, %rdx
-; CHECK-O3-NEXT:    movl $64, %edi
-; CHECK-O3-NEXT:    movl $2, %ecx
-; CHECK-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
-; CHECK-O3-NEXT:    addq $72, %rsp
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec8_double:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    subq $72, %rsp
@@ -798,20 +477,30 @@ define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
 ; CHECK-SSE-O3-NEXT:    addq $72, %rsp
 ; CHECK-SSE-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec8_double:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    subq $72, %rsp
-; CHECK-O0-NEXT:    movq %rdi, %rsi
-; CHECK-O0-NEXT:    movl $64, %edi
-; CHECK-O0-NEXT:    movq %rsp, %rdx
-; CHECK-O0-NEXT:    movl $2, %ecx
-; CHECK-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-O0-NEXT:    movapd (%rsp), %xmm0
-; CHECK-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1
-; CHECK-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
-; CHECK-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
-; CHECK-O0-NEXT:    addq $72, %rsp
-; CHECK-O0-NEXT:    retq
+; CHECK-AVX2-O3-LABEL: atomic_vec8_double:
+; CHECK-AVX2-O3:       # %bb.0:
+; CHECK-AVX2-O3-NEXT:    subq $72, %rsp
+; CHECK-AVX2-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX2-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX2-O3-NEXT:    movl $64, %edi
+; CHECK-AVX2-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX2-O3-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX2-O3-NEXT:    vmovups (%rsp), %ymm0
+; CHECK-AVX2-O3-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm1
+; CHECK-AVX2-O3-NEXT:    addq $72, %rsp
+; CHECK-AVX2-O3-NEXT:    retq
+;
+; CHECK-AVX512-O3-LABEL: atomic_vec8_double:
+; CHECK-AVX512-O3:       # %bb.0:
+; CHECK-AVX512-O3-NEXT:    subq $72, %rsp
+; CHECK-AVX512-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX512-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX512-O3-NEXT:    movl $64, %edi
+; CHECK-AVX512-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX512-O3-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX512-O3-NEXT:    vmovups (%rsp), %zmm0
+; CHECK-AVX512-O3-NEXT:    addq $72, %rsp
+; CHECK-AVX512-O3-NEXT:    retq
 ;
 ; CHECK-SSE-O0-LABEL: atomic_vec8_double:
 ; CHECK-SSE-O0:       # %bb.0:
@@ -827,24 +516,36 @@ define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
 ; CHECK-SSE-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
 ; CHECK-SSE-O0-NEXT:    addq $72, %rsp
 ; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX2-O0-LABEL: atomic_vec8_double:
+; CHECK-AVX2-O0:       # %bb.0:
+; CHECK-AVX2-O0-NEXT:    subq $72, %rsp
+; CHECK-AVX2-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX2-O0-NEXT:    movl $64, %edi
+; CHECK-AVX2-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX2-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX2-O0-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX2-O0-NEXT:    vmovupd (%rsp), %ymm0
+; CHECK-AVX2-O0-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm1
+; CHECK-AVX2-O0-NEXT:    addq $72, %rsp
+; CHECK-AVX2-O0-NEXT:    retq
+;
+; CHECK-AVX512-O0-LABEL: atomic_vec8_double:
+; CHECK-AVX512-O0:       # %bb.0:
+; CHECK-AVX512-O0-NEXT:    subq $72, %rsp
+; CHECK-AVX512-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX512-O0-NEXT:    movl $64, %edi
+; CHECK-AVX512-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX512-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX512-O0-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX512-O0-NEXT:    vmovupd (%rsp), %zmm0
+; CHECK-AVX512-O0-NEXT:    addq $72, %rsp
+; CHECK-AVX512-O0-NEXT:    retq
   %ret = load atomic <8 x double>, ptr %x acquire, align 4
   ret <8 x double> %ret
 }
 
 define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec16_bfloat:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    subq $40, %rsp
-; CHECK-O3-NEXT:    movq %rdi, %rsi
-; CHECK-O3-NEXT:    movq %rsp, %rdx
-; CHECK-O3-NEXT:    movl $32, %edi
-; CHECK-O3-NEXT:    movl $2, %ecx
-; CHECK-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-O3-NEXT:    addq $40, %rsp
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    subq $40, %rsp
@@ -870,19 +571,6 @@ define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind {
 ; CHECK-AVX-O3-NEXT:    addq $40, %rsp
 ; CHECK-AVX-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec16_bfloat:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    subq $40, %rsp
-; CHECK-O0-NEXT:    movq %rdi, %rsi
-; CHECK-O0-NEXT:    movl $32, %edi
-; CHECK-O0-NEXT:    movq %rsp, %rdx
-; CHECK-O0-NEXT:    movl $2, %ecx
-; CHECK-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-O0-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-O0-NEXT:    addq $40, %rsp
-; CHECK-O0-NEXT:    retq
-;
 ; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat:
 ; CHECK-SSE-O0:       # %bb.0:
 ; CHECK-SSE-O0-NEXT:    subq $40, %rsp
@@ -912,21 +600,6 @@ define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind {
 }
 
 define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
-; CHECK-O3-LABEL: atomic_vec32_half:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    subq $72, %rsp
-; CHECK-O3-NEXT:    movq %rdi, %rsi
-; CHECK-O3-NEXT:    movq %rsp, %rdx
-; CHECK-O3-NEXT:    movl $64, %edi
-; CHECK-O3-NEXT:    movl $2, %ecx
-; CHECK-O3-NEXT:    callq __atomic_load at PLT
-; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
-; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
-; CHECK-O3-NEXT:    addq $72, %rsp
-; CHECK-O3-NEXT:    retq
-;
 ; CHECK-SSE-O3-LABEL: atomic_vec32_half:
 ; CHECK-SSE-O3:       # %bb.0:
 ; CHECK-SSE-O3-NEXT:    subq $72, %rsp
@@ -942,20 +615,30 @@ define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
 ; CHECK-SSE-O3-NEXT:    addq $72, %rsp
 ; CHECK-SSE-O3-NEXT:    retq
 ;
-; CHECK-O0-LABEL: atomic_vec32_half:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    subq $72, %rsp
-; CHECK-O0-NEXT:    movq %rdi, %rsi
-; CHECK-O0-NEXT:    movl $64, %edi
-; CHECK-O0-NEXT:    movq %rsp, %rdx
-; CHECK-O0-NEXT:    movl $2, %ecx
-; CHECK-O0-NEXT:    callq __atomic_load at PLT
-; CHECK-O0-NEXT:    movaps (%rsp), %xmm0
-; CHECK-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
-; CHECK-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
-; CHECK-O0-NEXT:    addq $72, %rsp
-; CHECK-O0-NEXT:    retq
+; CHECK-AVX2-O3-LABEL: atomic_vec32_half:
+; CHECK-AVX2-O3:       # %bb.0:
+; CHECK-AVX2-O3-NEXT:    subq $72, %rsp
+; CHECK-AVX2-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX2-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX2-O3-NEXT:    movl $64, %edi
+; CHECK-AVX2-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX2-O3-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX2-O3-NEXT:    vmovups (%rsp), %ymm0
+; CHECK-AVX2-O3-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm1
+; CHECK-AVX2-O3-NEXT:    addq $72, %rsp
+; CHECK-AVX2-O3-NEXT:    retq
+;
+; CHECK-AVX512-O3-LABEL: atomic_vec32_half:
+; CHECK-AVX512-O3:       # %bb.0:
+; CHECK-AVX512-O3-NEXT:    subq $72, %rsp
+; CHECK-AVX512-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX512-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX512-O3-NEXT:    movl $64, %edi
+; CHECK-AVX512-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX512-O3-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX512-O3-NEXT:    vmovups (%rsp), %zmm0
+; CHECK-AVX512-O3-NEXT:    addq $72, %rsp
+; CHECK-AVX512-O3-NEXT:    retq
 ;
 ; CHECK-SSE-O0-LABEL: atomic_vec32_half:
 ; CHECK-SSE-O0:       # %bb.0:
@@ -971,6 +654,31 @@ define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
 ; CHECK-SSE-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
 ; CHECK-SSE-O0-NEXT:    addq $72, %rsp
 ; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX2-O0-LABEL: atomic_vec32_half:
+; CHECK-AVX2-O0:       # %bb.0:
+; CHECK-AVX2-O0-NEXT:    subq $72, %rsp
+; CHECK-AVX2-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX2-O0-NEXT:    movl $64, %edi
+; CHECK-AVX2-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX2-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX2-O0-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX2-O0-NEXT:    vmovups (%rsp), %ymm0
+; CHECK-AVX2-O0-NEXT:    vmovups {{[0-9]+}}(%rsp), %ymm1
+; CHECK-AVX2-O0-NEXT:    addq $72, %rsp
+; CHECK-AVX2-O0-NEXT:    retq
+;
+; CHECK-AVX512-O0-LABEL: atomic_vec32_half:
+; CHECK-AVX512-O0:       # %bb.0:
+; CHECK-AVX512-O0-NEXT:    subq $72, %rsp
+; CHECK-AVX512-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX512-O0-NEXT:    movl $64, %edi
+; CHECK-AVX512-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX512-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX512-O0-NEXT:    callq __atomic_load at PLT
+; CHECK-AVX512-O0-NEXT:    vmovups (%rsp), %zmm0
+; CHECK-AVX512-O0-NEXT:    addq $72, %rsp
+; CHECK-AVX512-O0-NEXT:    retq
   %ret = load atomic <32 x half>, ptr %x acquire, align 4
   ret <32 x half> %ret
 }



More information about the llvm-commits mailing list