[llvm] f229bf2 - [Legalize][X86] Improve nnan fmin/fmax vector reduction
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 27 01:50:36 PDT 2020
Author: Nikita Popov
Date: 2020-09-27T10:47:35+02:00
New Revision: f229bf2e12461be55446e6b08ccb931308586031
URL: https://github.com/llvm/llvm-project/commit/f229bf2e12461be55446e6b08ccb931308586031
DIFF: https://github.com/llvm/llvm-project/commit/f229bf2e12461be55446e6b08ccb931308586031.diff
LOG: [Legalize][X86] Improve nnan fmin/fmax vector reduction
Use +/-Inf or +/-Largest as neutral element for nnan fmin/fmax
reductions. This avoids dropping any FMF flags. Preserving the
nnan flag in particular is important to get a good lowering on X86.
Differential Revision: https://reviews.llvm.org/D87586
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index e67717b81e4e..b1952225ca10 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4794,20 +4794,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
case ISD::VECREDUCE_FMUL:
NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
break;
- case ISD::VECREDUCE_FMAX:
- // This has maxnum semantics, so NaN represents missing data. We must clear
- // 'nnan' if it was set because the NaN would be a poison value.
- NeutralElem = DAG.getConstantFP(
- std::numeric_limits<double>::quiet_NaN(), dl, ElemVT);
- Flags.setNoNaNs(false);
- break;
case ISD::VECREDUCE_FMIN:
- // This has minnum semantics, so NaN represents missing data. We must clear
- // 'nnan' if it was set because the NaN would be a poison value.
- NeutralElem = DAG.getConstantFP(
- std::numeric_limits<double>::quiet_NaN(), dl, ElemVT);
- Flags.setNoNaNs(false);
- break;
+ case ISD::VECREDUCE_FMAX: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = DAG.EVTToAPFloatSemantics(ElemVT);
+ APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) :
+ !Flags.hasNoInfs() ? APFloat::getInf(Semantics) :
+ APFloat::getLargest(Semantics);
+ if (N->getOpcode() == ISD::VECREDUCE_FMAX)
+ NeutralAF.changeSign();
+
+ NeutralElem = DAG.getConstantFP(NeutralAF, dl, ElemVT);
+ }
}
// Pad the vector with the neutral element.
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 5fd7116e9068..89cee4f0a060 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2143289344
+; CHECK-NEXT: mov w8, #-8388608
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
@@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32_ninf:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2143289344
+; CHECK-NEXT: mov w8, #-8388609
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 7a37c0d047a1..bb2d6b75bcd2 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2143289344
+; CHECK-NEXT: mov w8, #2139095040
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fminnmv s0, v0.4s
@@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32_ninf:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2143289344
+; CHECK-NEXT: mov w8, #2139095039
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fminnmv s0, v0.4s
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index dd3378411ecc..f4539c572375 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -13,46 +13,27 @@
define float @test_v2f32(<2 x float> %a0) {
; SSE2-LABEL: test_v2f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: cmpunordss %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: andps %xmm2, %xmm3
-; SSE2-NEXT: maxss %xmm0, %xmm2
-; SSE2-NEXT: andnps %xmm2, %xmm1
-; SSE2-NEXT: orps %xmm3, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: cmpunordss %xmm0, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: andps %xmm2, %xmm3
-; SSE41-NEXT: maxss %xmm0, %xmm2
-; SSE41-NEXT: andnps %xmm2, %xmm1
-; SSE41-NEXT: orps %xmm3, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm0
+; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1
-; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0)
ret float %1
@@ -302,65 +283,37 @@ define double @test_v3f64(<3 x double> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],mem[1]
-; SSE2-NEXT: movapd %xmm2, %xmm1
-; SSE2-NEXT: maxpd %xmm0, %xmm1
-; SSE2-NEXT: cmpunordpd %xmm0, %xmm0
-; SSE2-NEXT: andpd %xmm0, %xmm2
-; SSE2-NEXT: andnpd %xmm1, %xmm0
-; SSE2-NEXT: orpd %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
+; SSE2-NEXT: maxpd %xmm2, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: cmpunordsd %xmm0, %xmm1
-; SSE2-NEXT: movapd %xmm1, %xmm3
-; SSE2-NEXT: andpd %xmm2, %xmm3
-; SSE2-NEXT: maxsd %xmm0, %xmm2
-; SSE2-NEXT: andnpd %xmm2, %xmm1
-; SSE2-NEXT: orpd %xmm3, %xmm1
-; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE2-NEXT: maxsd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v3f64:
; SSE41: # %bb.0:
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],mem[1]
-; SSE41-NEXT: movapd %xmm2, %xmm1
-; SSE41-NEXT: maxpd %xmm0, %xmm1
-; SSE41-NEXT: cmpunordpd %xmm0, %xmm0
-; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
-; SSE41-NEXT: movapd %xmm1, %xmm2
-; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
-; SSE41-NEXT: movapd %xmm1, %xmm0
-; SSE41-NEXT: cmpunordsd %xmm1, %xmm0
-; SSE41-NEXT: movapd %xmm0, %xmm3
-; SSE41-NEXT: andpd %xmm2, %xmm3
-; SSE41-NEXT: maxsd %xmm1, %xmm2
-; SSE41-NEXT: andnpd %xmm2, %xmm0
-; SSE41-NEXT: orpd %xmm3, %xmm0
+; SSE41-NEXT: maxpd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm0, %xmm1
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE41-NEXT: maxsd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v3f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3
-; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v3f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
-; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm0
-; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double> %a0)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index 4354463dfdc2..5846f588581d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -21,46 +21,27 @@ define float @test_v1f32(<1 x float> %a0) {
define float @test_v2f32(<2 x float> %a0) {
; SSE2-LABEL: test_v2f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: cmpunordss %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: andps %xmm2, %xmm3
-; SSE2-NEXT: minss %xmm0, %xmm2
-; SSE2-NEXT: andnps %xmm2, %xmm1
-; SSE2-NEXT: orps %xmm3, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: cmpunordss %xmm0, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: andps %xmm2, %xmm3
-; SSE41-NEXT: minss %xmm0, %xmm2
-; SSE41-NEXT: andnps %xmm2, %xmm1
-; SSE41-NEXT: orps %xmm3, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm0
+; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1
-; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
-; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0)
ret float %1
@@ -72,20 +53,9 @@ define float @test_v3f32(<3 x float> %a0) {
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: cmpunordss %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: andps %xmm2, %xmm3
-; SSE2-NEXT: minss %xmm0, %xmm2
-; SSE2-NEXT: andnps %xmm2, %xmm1
-; SSE2-NEXT: orps %xmm3, %xmm1
+; SSE2-NEXT: minss %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: minss %xmm1, %xmm2
-; SSE2-NEXT: cmpunordss %xmm1, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: andnps %xmm2, %xmm3
-; SSE2-NEXT: andps %xmm0, %xmm1
-; SSE2-NEXT: orps %xmm3, %xmm1
+; SSE2-NEXT: minss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -93,45 +63,26 @@ define float @test_v3f32(<3 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: cmpunordss %xmm0, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: andps %xmm2, %xmm3
-; SSE41-NEXT: minss %xmm0, %xmm2
-; SSE41-NEXT: andnps %xmm2, %xmm1
-; SSE41-NEXT: orps %xmm3, %xmm1
+; SSE41-NEXT: minss %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: minss %xmm1, %xmm2
-; SSE41-NEXT: cmpunordss %xmm1, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: andnps %xmm2, %xmm3
-; SSE41-NEXT: andps %xmm0, %xmm1
-; SSE41-NEXT: orps %xmm3, %xmm1
+; SSE41-NEXT: minss %xmm0, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v3f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3
-; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
-; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v3f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2
-; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512-NEXT: vcmpunordss %xmm2, %xmm2, %k1
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminss %xmm2, %xmm1, %xmm0
-; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a0)
ret float %1
More information about the llvm-commits
mailing list