[llvm] [X86] combine-fcopysign.ll - extend test coverage to all x86-64/x86-64-v2/x86-64-v3/x86-64-v4 levels (PR #184579)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 02:16:52 PST 2026
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/184579
None
>From f9bc2453e6dda98b4fb12e9f32a44097fdb6eac9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 4 Mar 2026 10:15:45 +0000
Subject: [PATCH] [X86] combine-fcopysign.ll - extend test coverage to all
x86-64/x86-64-v2/x86-64-v3/x86-64-v4 levels
---
llvm/test/CodeGen/X86/combine-fcopysign.ll | 267 +++++++++++++--------
1 file changed, 172 insertions(+), 95 deletions(-)
diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll
index d7031be3addd9..59d45594b094a 100644
--- a/llvm/test/CodeGen/X86/combine-fcopysign.ll
+++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
;
; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
@@ -14,11 +16,16 @@ define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_pos_constant0:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_pos_constant0:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
ret <4 x float> %1
}
@@ -29,11 +36,16 @@ define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_pos_constant1:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_pos_constant1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
ret <4 x float> %1
}
@@ -44,11 +56,16 @@ define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fabs_sgn:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fabs_sgn:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fabs_sgn:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
%2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
ret <4 x float> %2
@@ -61,11 +78,16 @@ define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_neg_constant0:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_neg_constant0:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
ret <4 x float> %1
}
@@ -76,11 +98,16 @@ define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_neg_constant1:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_neg_constant1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
ret <4 x float> %1
}
@@ -91,11 +118,16 @@ define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x flo
; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
%2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1
%3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2)
@@ -111,14 +143,19 @@ define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %
; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fabs_mag:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fabs_mag:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (m32bcst & (xmm0 ^ xmm1))
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
%2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
ret <4 x float> %2
@@ -133,14 +170,19 @@ define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %
; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fneg_mag:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fneg_mag:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (m32bcst & (xmm0 ^ xmm1))
+; AVX512-NEXT: retq
%1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
%2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
ret <4 x float> %2
@@ -155,14 +197,19 @@ define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x flo
; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fcopysign_mag:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fcopysign_mag:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (m32bcst & (xmm0 ^ xmm1))
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z)
%2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
ret <4 x float> %2
@@ -177,14 +224,20 @@ define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x flo
; SSE-NEXT: orps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
-; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fcopysign_sgn:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vandps %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fcopysign_sgn:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (m32bcst & (xmm0 ^ xmm1))
+; AVX512-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z)
%2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
ret <4 x float> %2
@@ -207,15 +260,21 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float
; SSE-NEXT: orps %xmm4, %xmm1
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvtps2pd %xmm1, %ymm1
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fpext_sgn:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvtps2pd %xmm1, %ymm1
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fpext_sgn:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtps2pd %xmm1, %ymm1
+; AVX512-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (m64bcst & (ymm0 ^ ymm1))
+; AVX512-NEXT: retq
%1 = fpext <4 x float> %y to <4 x double>
%2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1)
ret <4 x double> %2
@@ -233,16 +292,23 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl
; SSE-NEXT: orpd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn:
-; AVX: # %bb.0:
-; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
-; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX2-LABEL: combine_vec_fcopysign_fptrunc_sgn:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcvtpd2ps %ymm1, %xmm1
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX2-NEXT: vandpd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: combine_vec_fcopysign_fptrunc_sgn:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtpd2ps %ymm1, %xmm1
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (m32bcst & (xmm0 ^ xmm1))
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = fptrunc <4 x double> %y to <4 x float>
%2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
ret <4 x float> %2
@@ -264,18 +330,29 @@ define double @PR136368(double %x) {
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: PR136368:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
-; AVX-NEXT: # xmm1 = mem[0,0]
-; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
-; AVX-NEXT: vcmpltsd %xmm2, %xmm3, %xmm2
-; AVX-NEXT: vandpd %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vandnpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vorpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX2-LABEL: PR136368:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
+; AVX2-NEXT: # xmm1 = mem[0,0]
+; AVX2-NEXT: vandpd %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
+; AVX2-NEXT: vcmpltsd %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vandpd %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vandnpd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vandpd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vorpd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: PR136368:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = [NaN,NaN]
+; AVX512-NEXT: # xmm1 = mem[0,0]
+; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vmovsd {{.*#+}} xmm3 = [1.5707963267948966E+0,0.0E+0]
+; AVX512-NEXT: vcmpltsd %xmm2, %xmm3, %k1
+; AVX512-NEXT: vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z}
+; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = xmm0 ^ (xmm1 & (xmm0 ^ xmm2))
+; AVX512-NEXT: retq
%fabs = tail call double @llvm.fabs.f64(double %x)
%cmp = fcmp ogt double %fabs, 0x3FF921FB54442D18
%cond = select i1 %cmp, double 0x3FF921FB54442D18, double 0.000000e+00
More information about the llvm-commits
mailing list