[llvm] r332920 - [DAG] fold FP binops with undef operands to NaN
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon May 21 16:54:19 PDT 2018
Author: spatel
Date: Mon May 21 16:54:19 2018
New Revision: 332920
URL: http://llvm.org/viewvc/llvm-project?rev=332920&view=rev
Log:
[DAG] fold FP binops with undef operands to NaN
This is the FP sibling of D43141 with the corresponding IR change in rL327212.
We can't propagate undef here because if a variable operand is a NaN, these
binops must propagate NaN. Neither global nor node-level fast-math makes a
difference. If we have 'nnan', I think later folds can turn the NaN into undef.
The tests in X86/fp-undef.ll are meant to be the definitive verification for
these folds - everything reduces identically now.
The other test changes are collateral damage. They may need to be altered to
preserve their intent.
Differential Revision: https://reviews.llvm.org/D47026
Removed:
llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll
llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll
llvm/trunk/test/CodeGen/X86/fp-undef.ll
llvm/trunk/test/CodeGen/X86/pr23103.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon May 21 16:54:19 2018
@@ -4767,6 +4767,18 @@ SDValue SelectionDAG::getNode(unsigned O
}
}
+ // Any FP binop with an undef operand is folded to NaN. This matches the
+ // behavior of the IR optimizer.
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
@@ -4776,9 +4788,6 @@ SDValue SelectionDAG::getNode(unsigned O
case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
- case ISD::FSUB:
- case ISD::FDIV:
- case ISD::FREM:
return getUNDEF(VT); // fold op(undef, arg2) -> undef
case ISD::UDIV:
case ISD::SDIV:
@@ -4813,14 +4822,6 @@ SDValue SelectionDAG::getNode(unsigned O
case ISD::SRL:
case ISD::SHL:
return getUNDEF(VT); // fold op(arg1, undef) -> undef
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath)
- return N2;
- break;
case ISD::MUL:
case ISD::AND:
return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
Modified: llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll Mon May 21 16:54:19 2018
@@ -100,9 +100,8 @@ define <2 x i32> @test9(<2 x float> %f)
ret <2 x i32> %vcvt.i
}
-; Don't combine all undefs.
+; Combine all undefs.
; CHECK-LABEL: test10
-; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
; CHECK: ret
define <2 x i32> @test10(<2 x float> %f) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll Mon May 21 16:54:19 2018
@@ -145,10 +145,13 @@ define <2 x half> @v_mad_mix_v2f32_clamp
; FIXME: Should be packed into 2 registers per argument?
; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt:
; GCN: s_waitcnt
-; GFX9-NEXT: v_mad_mixlo_f16 v2, v2, v5, v8 op_sel_hi:[1,1,1] clamp
-; GFX9-NEXT: v_mad_mixhi_f16 v2, v0, v0, v0 clamp
+; GFX9-NEXT: v_mad_mixlo_f16 v2, v2, v5, v8 op_sel_hi:[1,1,1]
; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v3, v6 op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: s_movk_i32 s6, 0x7e00
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT: v_lshl_or_b32 v2, s6, 16, v2
; GFX9-NEXT: v_mad_mixhi_f16 v0, v1, v4, v7 op_sel_hi:[1,1,1] clamp
+; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 clamp
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX9-NEXT: s_setpc_b64
define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
Removed: llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll?rev=332919&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll (removed)
@@ -1,9 +0,0 @@
-; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 -asm-verbose=1 | FileCheck %s
-
-; CHECK: // implicit-def: %f[[F0:[0-9]+]]
-; CHECK: add.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f[[F0]];
-define float @foo(float %a) {
- %ret = fadd float %a, undef
- ret float %ret
-}
-
Modified: llvm/trunk/test/CodeGen/X86/fp-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-undef.ll?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-undef.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-undef.ll Mon May 21 16:54:19 2018
@@ -6,27 +6,19 @@
; adding something here, you should probably add it there too.
define float @fadd_undef_op0(float %x) {
-; STRICT-LABEL: fadd_undef_op0:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op0:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op0:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fadd float undef, %x
ret float %r
}
define float @fadd_undef_op1(float %x) {
-; STRICT-LABEL: fadd_undef_op1:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op1:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op1:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fadd float %x, undef
ret float %r
}
@@ -34,46 +26,35 @@ define float @fadd_undef_op1(float %x) {
define float @fsub_undef_op0(float %x) {
; ANY-LABEL: fsub_undef_op0:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = fsub float undef, %x
ret float %r
}
define float @fsub_undef_op1(float %x) {
-; STRICT-LABEL: fsub_undef_op1:
-; STRICT: # %bb.0:
-; STRICT-NEXT: subss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fsub_undef_op1:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fsub_undef_op1:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fsub float %x, undef
ret float %r
}
define float @fmul_undef_op0(float %x) {
-; STRICT-LABEL: fmul_undef_op0:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op0:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op0:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fmul float undef, %x
ret float %r
}
define float @fmul_undef_op1(float %x) {
-; STRICT-LABEL: fmul_undef_op1:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op1:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op1:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fmul float %x, undef
ret float %r
}
@@ -81,20 +62,17 @@ define float @fmul_undef_op1(float %x) {
define float @fdiv_undef_op0(float %x) {
; ANY-LABEL: fdiv_undef_op0:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = fdiv float undef, %x
ret float %r
}
define float @fdiv_undef_op1(float %x) {
-; STRICT-LABEL: fdiv_undef_op1:
-; STRICT: # %bb.0:
-; STRICT-NEXT: divss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fdiv_undef_op1:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fdiv_undef_op1:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fdiv float %x, undef
ret float %r
}
@@ -102,19 +80,17 @@ define float @fdiv_undef_op1(float %x) {
define float @frem_undef_op0(float %x) {
; ANY-LABEL: frem_undef_op0:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = frem float undef, %x
ret float %r
}
define float @frem_undef_op1(float %x) {
-; STRICT-LABEL: frem_undef_op1:
-; STRICT: # %bb.0:
-; STRICT-NEXT: jmp fmodf # TAILCALL
-;
-; UNSAFE-LABEL: frem_undef_op1:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: frem_undef_op1:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = frem float %x, undef
ret float %r
}
@@ -122,27 +98,19 @@ define float @frem_undef_op1(float %x) {
; Repeat all tests with fast-math-flags. Alternate 'nnan' and 'fast' for more coverage.
define float @fadd_undef_op0_nnan(float %x) {
-; STRICT-LABEL: fadd_undef_op0_nnan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op0_nnan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op0_nnan:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fadd nnan float undef, %x
ret float %r
}
define float @fadd_undef_op1_fast(float %x) {
-; STRICT-LABEL: fadd_undef_op1_fast:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op1_fast:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op1_fast:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fadd fast float %x, undef
ret float %r
}
@@ -150,46 +118,35 @@ define float @fadd_undef_op1_fast(float
define float @fsub_undef_op0_fast(float %x) {
; ANY-LABEL: fsub_undef_op0_fast:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = fsub fast float undef, %x
ret float %r
}
define float @fsub_undef_op1_nnan(float %x) {
-; STRICT-LABEL: fsub_undef_op1_nnan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: subss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fsub_undef_op1_nnan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fsub_undef_op1_nnan:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fsub nnan float %x, undef
ret float %r
}
define float @fmul_undef_op0_nnan(float %x) {
-; STRICT-LABEL: fmul_undef_op0_nnan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op0_nnan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op0_nnan:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fmul nnan float undef, %x
ret float %r
}
define float @fmul_undef_op1_fast(float %x) {
-; STRICT-LABEL: fmul_undef_op1_fast:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op1_fast:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op1_fast:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fmul fast float %x, undef
ret float %r
}
@@ -197,20 +154,17 @@ define float @fmul_undef_op1_fast(float
define float @fdiv_undef_op0_fast(float %x) {
; ANY-LABEL: fdiv_undef_op0_fast:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = fdiv fast float undef, %x
ret float %r
}
define float @fdiv_undef_op1_nnan(float %x) {
-; STRICT-LABEL: fdiv_undef_op1_nnan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: divss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fdiv_undef_op1_nnan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fdiv_undef_op1_nnan:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fdiv nnan float %x, undef
ret float %r
}
@@ -218,19 +172,17 @@ define float @fdiv_undef_op1_nnan(float
define float @frem_undef_op0_nnan(float %x) {
; ANY-LABEL: frem_undef_op0_nnan:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = frem nnan float undef, %x
ret float %r
}
define float @frem_undef_op1_fast(float %x) {
-; STRICT-LABEL: frem_undef_op1_fast:
-; STRICT: # %bb.0:
-; STRICT-NEXT: jmp fmodf # TAILCALL
-;
-; UNSAFE-LABEL: frem_undef_op1_fast:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: frem_undef_op1_fast:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = frem fast float %x, undef
ret float %r
}
@@ -238,14 +190,10 @@ define float @frem_undef_op1_fast(float
; Constant folding - undef undef.
define double @fadd_undef_undef(double %x) {
-; STRICT-LABEL: fadd_undef_undef:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addsd %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_undef:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_undef:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fadd double undef, undef
ret double %r
}
@@ -253,20 +201,17 @@ define double @fadd_undef_undef(double %
define double @fsub_undef_undef(double %x) {
; ANY-LABEL: fsub_undef_undef:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = fsub double undef, undef
ret double %r
}
define double @fmul_undef_undef(double %x) {
-; STRICT-LABEL: fmul_undef_undef:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulsd %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_undef:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_undef:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fmul double undef, undef
ret double %r
}
@@ -274,6 +219,7 @@ define double @fmul_undef_undef(double %
define double @fdiv_undef_undef(double %x) {
; ANY-LABEL: fdiv_undef_undef:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = fdiv double undef, undef
ret double %r
@@ -282,6 +228,7 @@ define double @fdiv_undef_undef(double %
define double @frem_undef_undef(double %x) {
; ANY-LABEL: frem_undef_undef:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = frem double undef, undef
ret double %r
@@ -290,27 +237,19 @@ define double @frem_undef_undef(double %
; Constant folding.
define float @fadd_undef_op0_nnan_constant(float %x) {
-; STRICT-LABEL: fadd_undef_op0_nnan_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addss {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op0_nnan_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op0_nnan_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fadd nnan float undef, 1.0
ret float %r
}
define float @fadd_undef_op1_constant(float %x) {
-; STRICT-LABEL: fadd_undef_op1_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addss {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op1_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op1_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fadd float 2.0, undef
ret float %r
}
@@ -318,47 +257,35 @@ define float @fadd_undef_op1_constant(fl
define float @fsub_undef_op0_fast_constant(float %x) {
; ANY-LABEL: fsub_undef_op0_fast_constant:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = fsub fast float undef, 3.0
ret float %r
}
define float @fsub_undef_op1_constant(float %x) {
-; STRICT-LABEL: fsub_undef_op1_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; STRICT-NEXT: subss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fsub_undef_op1_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fsub_undef_op1_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fsub float 4.0, undef
ret float %r
}
define float @fmul_undef_op0_nnan_constant(float %x) {
-; STRICT-LABEL: fmul_undef_op0_nnan_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulss {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op0_nnan_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op0_nnan_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fmul nnan float undef, 5.0
ret float %r
}
define float @fmul_undef_op1_constant(float %x) {
-; STRICT-LABEL: fmul_undef_op1_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulss {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op1_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op1_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fmul float 6.0, undef
ret float %r
}
@@ -366,21 +293,17 @@ define float @fmul_undef_op1_constant(fl
define float @fdiv_undef_op0_fast_constant(float %x) {
; ANY-LABEL: fdiv_undef_op0_fast_constant:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = fdiv fast float undef, 7.0
ret float %r
}
define float @fdiv_undef_op1_constant(float %x) {
-; STRICT-LABEL: fdiv_undef_op1_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; STRICT-NEXT: divss %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fdiv_undef_op1_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fdiv_undef_op1_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = fdiv float 8.0, undef
ret float %r
}
@@ -388,20 +311,17 @@ define float @fdiv_undef_op1_constant(fl
define float @frem_undef_op0_nnan_constant(float %x) {
; ANY-LABEL: frem_undef_op0_nnan_constant:
; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ANY-NEXT: retq
%r = frem nnan float undef, 9.0
ret float %r
}
define float @frem_undef_op1_constant(float %x) {
-; STRICT-LABEL: frem_undef_op1_constant:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; STRICT-NEXT: jmp fmodf # TAILCALL
-;
-; UNSAFE-LABEL: frem_undef_op1_constant:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: frem_undef_op1_constant:
+; ANY: # %bb.0:
+; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ANY-NEXT: retq
%r = frem float 10.0, undef
ret float %r
}
@@ -409,27 +329,19 @@ define float @frem_undef_op1_constant(fl
; Constant folding - special constants: NaN.
define double @fadd_undef_op0_constant_nan(double %x) {
-; STRICT-LABEL: fadd_undef_op0_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op0_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op0_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fadd double undef, 0x7FF8000000000000
ret double %r
}
define double @fadd_undef_op1_fast_constant_nan(double %x) {
-; STRICT-LABEL: fadd_undef_op1_fast_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op1_fast_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op1_fast_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fadd fast double 0xFFF0000000000001, undef
ret double %r
}
@@ -437,47 +349,35 @@ define double @fadd_undef_op1_fast_const
define double @fsub_undef_op0_constant_nan(double %x) {
; ANY-LABEL: fsub_undef_op0_constant_nan:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = fsub double undef, 0xFFF8000000000010
ret double %r
}
define double @fsub_undef_op1_nnan_constant_nan(double %x) {
-; STRICT-LABEL: fsub_undef_op1_nnan_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; STRICT-NEXT: subsd %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fsub_undef_op1_nnan_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fsub_undef_op1_nnan_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fsub nnan double 0x7FF0000000000011, undef
ret double %r
}
define double @fmul_undef_op0_constant_nan(double %x) {
-; STRICT-LABEL: fmul_undef_op0_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op0_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op0_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fmul double undef, 0x7FF8000000000100
ret double %r
}
define double @fmul_undef_op1_fast_constant_nan(double %x) {
-; STRICT-LABEL: fmul_undef_op1_fast_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op1_fast_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op1_fast_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fmul fast double 0xFFF0000000000101, undef
ret double %r
}
@@ -485,21 +385,17 @@ define double @fmul_undef_op1_fast_const
define double @fdiv_undef_op0_constant_nan(double %x) {
; ANY-LABEL: fdiv_undef_op0_constant_nan:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = fdiv double undef, 0xFFF8000000000110
ret double %r
}
define double @fdiv_undef_op1_nnan_constant_nan(double %x) {
-; STRICT-LABEL: fdiv_undef_op1_nnan_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; STRICT-NEXT: divsd %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fdiv_undef_op1_nnan_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fdiv_undef_op1_nnan_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fdiv nnan double 0x7FF0000000000111, undef
ret double %r
}
@@ -507,20 +403,17 @@ define double @fdiv_undef_op1_nnan_const
define double @frem_undef_op0_constant_nan(double %x) {
; ANY-LABEL: frem_undef_op0_constant_nan:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = frem double undef, 0x7FF8000000001000
ret double %r
}
define double @frem_undef_op1_fast_constant_nan(double %x) {
-; STRICT-LABEL: frem_undef_op1_fast_constant_nan:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; STRICT-NEXT: jmp fmod # TAILCALL
-;
-; UNSAFE-LABEL: frem_undef_op1_fast_constant_nan:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: frem_undef_op1_fast_constant_nan:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = frem fast double 0xFFF0000000001001, undef
ret double %r
}
@@ -528,27 +421,19 @@ define double @frem_undef_op1_fast_const
; Constant folding - special constants: Inf.
define double @fadd_undef_op0_constant_inf(double %x) {
-; STRICT-LABEL: fadd_undef_op0_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op0_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op0_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fadd double undef, 0x7FF0000000000000
ret double %r
}
define double @fadd_undef_op1_fast_constant_inf(double %x) {
-; STRICT-LABEL: fadd_undef_op1_fast_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fadd_undef_op1_fast_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fadd_undef_op1_fast_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fadd fast double 0xFFF0000000000000, undef
ret double %r
}
@@ -556,47 +441,35 @@ define double @fadd_undef_op1_fast_const
define double @fsub_undef_op0_constant_inf(double %x) {
; ANY-LABEL: fsub_undef_op0_constant_inf:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = fsub double undef, 0xFFF0000000000000
ret double %r
}
define double @fsub_undef_op1_ninf_constant_inf(double %x) {
-; STRICT-LABEL: fsub_undef_op1_ninf_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; STRICT-NEXT: subsd %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fsub_undef_op1_ninf_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fsub_undef_op1_ninf_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fsub ninf double 0x7FF0000000000000, undef
ret double %r
}
define double @fmul_undef_op0_constant_inf(double %x) {
-; STRICT-LABEL: fmul_undef_op0_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op0_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op0_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fmul double undef, 0x7FF0000000000000
ret double %r
}
define double @fmul_undef_op1_fast_constant_inf(double %x) {
-; STRICT-LABEL: fmul_undef_op1_fast_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fmul_undef_op1_fast_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fmul_undef_op1_fast_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fmul fast double 0xFFF0000000000000, undef
ret double %r
}
@@ -604,21 +477,17 @@ define double @fmul_undef_op1_fast_const
define double @fdiv_undef_op0_constant_inf(double %x) {
; ANY-LABEL: fdiv_undef_op0_constant_inf:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = fdiv double undef, 0xFFF0000000000000
ret double %r
}
define double @fdiv_undef_op1_ninf_constant_inf(double %x) {
-; STRICT-LABEL: fdiv_undef_op1_ninf_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; STRICT-NEXT: divsd %xmm0, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: fdiv_undef_op1_ninf_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: fdiv_undef_op1_ninf_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = fdiv ninf double 0x7FF0000000000000, undef
ret double %r
}
@@ -626,20 +495,17 @@ define double @fdiv_undef_op1_ninf_const
define double @frem_undef_op0_constant_inf(double %x) {
; ANY-LABEL: frem_undef_op0_constant_inf:
; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; ANY-NEXT: retq
%r = frem double undef, 0x7FF0000000000000
ret double %r
}
define double @frem_undef_op1_fast_constant_inf(double %x) {
-; STRICT-LABEL: frem_undef_op1_fast_constant_inf:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; STRICT-NEXT: jmp fmod # TAILCALL
-;
-; UNSAFE-LABEL: frem_undef_op1_fast_constant_inf:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: retq
+; ANY-LABEL: frem_undef_op1_fast_constant_inf:
+; ANY: # %bb.0:
+; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; ANY-NEXT: retq
%r = frem fast double 0xFFF0000000000000, undef
ret double %r
}
Modified: llvm/trunk/test/CodeGen/X86/pr23103.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr23103.ll?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr23103.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr23103.ll Mon May 21 16:54:19 2018
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx < %s | FileCheck %s
; When commuting a VADDSDrr instruction, verify that the 'IsUndef' flag is
@@ -8,11 +9,15 @@ declare zeroext i1 @foo(<1 x double>)
define <1 x double> @pr23103(<1 x double>* align 8 %Vp) {
; CHECK-LABEL: pr23103:
-; CHECK: vmovsd (%rdi), %xmm0
-; CHECK-NEXT: vmovsd %xmm0, {{.*}}(%rsp) {{.*#+}} 8-byte Spill
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: callq foo
-; CHECK-NEXT: vaddsd {{.*}}(%rsp), %xmm0, %xmm0 {{.*#+}} 8-byte Folded Reload
-; CHECK: retq
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
entry:
%V = load <1 x double>, <1 x double>* %Vp, align 8
%call = call zeroext i1 @foo(<1 x double> %V)
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll Mon May 21 16:54:19 2018
@@ -755,31 +755,26 @@ define float @test_v16f32_zero(<16 x flo
define float @test_v2f32_undef(<2 x float> %a0) {
; SSE2-LABEL: test_v2f32_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: addss %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: addss %xmm1, %xmm0
+; SSE2-NEXT: addss {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: addss %xmm0, %xmm0
-; SSE41-NEXT: addss %xmm1, %xmm0
+; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: addss {{.*}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm1
; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0)
ret float %1
@@ -789,23 +784,19 @@ define float @test_v4f32_undef(<4 x floa
; SSE2-LABEL: test_v4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: addss %xmm0, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; SSE2-NEXT: addss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm2
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
-; SSE2-NEXT: addss %xmm2, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: addss %xmm1, %xmm0
+; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: addss %xmm0, %xmm1
-; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: addss %xmm2, %xmm1
+; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: addss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm2
@@ -815,9 +806,8 @@ define float @test_v4f32_undef(<4 x floa
;
; AVX-LABEL: test_v4f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm1
-; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
@@ -826,9 +816,8 @@ define float @test_v4f32_undef(<4 x floa
;
; AVX512-LABEL: test_v4f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
@@ -842,15 +831,13 @@ define float @test_v8f32_undef(<8 x floa
; SSE2-LABEL: test_v8f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: addss %xmm0, %xmm2
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE2-NEXT: addss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
+; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE2-NEXT: addss %xmm2, %xmm3
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
-; SSE2-NEXT: addss %xmm3, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: addss %xmm2, %xmm0
+; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
@@ -864,10 +851,8 @@ define float @test_v8f32_undef(<8 x floa
;
; SSE41-LABEL: test_v8f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: addss %xmm0, %xmm2
-; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: addss %xmm3, %xmm2
+; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE41-NEXT: addss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE41-NEXT: addss %xmm2, %xmm3
@@ -885,9 +870,8 @@ define float @test_v8f32_undef(<8 x floa
;
; AVX-LABEL: test_v8f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm1
-; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
@@ -905,9 +889,8 @@ define float @test_v8f32_undef(<8 x floa
;
; AVX512-LABEL: test_v8f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
@@ -930,15 +913,13 @@ define float @test_v16f32_undef(<16 x fl
; SSE2-LABEL: test_v16f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm4
-; SSE2-NEXT: addss %xmm0, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
+; SSE2-NEXT: addss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3]
+; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE2-NEXT: addss %xmm4, %xmm5
-; SSE2-NEXT: movaps %xmm0, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
-; SSE2-NEXT: addss %xmm5, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: addss %xmm4, %xmm0
+; SSE2-NEXT: addss %xmm5, %xmm0
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
@@ -970,10 +951,8 @@ define float @test_v16f32_undef(<16 x fl
;
; SSE41-LABEL: test_v16f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm4
-; SSE41-NEXT: addss %xmm0, %xmm4
-; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; SSE41-NEXT: addss %xmm5, %xmm4
+; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: addss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE41-NEXT: addss %xmm4, %xmm5
@@ -1007,9 +986,8 @@ define float @test_v16f32_undef(<16 x fl
;
; AVX-LABEL: test_v16f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX-NEXT: vaddss {{.*}}(%rip), %xmm2, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
@@ -1042,9 +1020,8 @@ define float @test_v16f32_undef(<16 x fl
;
; AVX512-LABEL: test_v16f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
@@ -1629,24 +1606,20 @@ define double @test_v16f64_zero(<16 x do
define double @test_v2f64_undef(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: addsd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: addsd %xmm1, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0)
ret double %1
@@ -1655,10 +1628,8 @@ define double @test_v2f64_undef(<2 x dou
define double @test_v4f64_undef(<4 x double> %a0) {
; SSE-LABEL: test_v4f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm2
-; SSE-NEXT: addsd %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: addsd %xmm2, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: addsd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: addsd %xmm1, %xmm0
@@ -1666,9 +1637,8 @@ define double @test_v4f64_undef(<4 x dou
;
; AVX-LABEL: test_v4f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm1
-; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1678,9 +1648,8 @@ define double @test_v4f64_undef(<4 x dou
;
; AVX512-LABEL: test_v4f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1694,10 +1663,8 @@ define double @test_v4f64_undef(<4 x dou
define double @test_v8f64_undef(<8 x double> %a0) {
; SSE-LABEL: test_v8f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm4
-; SSE-NEXT: addsd %xmm0, %xmm4
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: addsd %xmm4, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: addsd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: addsd %xmm1, %xmm0
@@ -1711,9 +1678,8 @@ define double @test_v8f64_undef(<8 x dou
;
; AVX-LABEL: test_v8f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm2, %xmm2
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vaddsd %xmm0, %xmm2, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1730,9 +1696,8 @@ define double @test_v8f64_undef(<8 x dou
;
; AVX512-LABEL: test_v8f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
@@ -1754,10 +1719,8 @@ define double @test_v8f64_undef(<8 x dou
define double @test_v16f64_undef(<16 x double> %a0) {
; SSE-LABEL: test_v16f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm8
-; SSE-NEXT: addsd %xmm0, %xmm8
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: addsd %xmm8, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: addsd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: addsd %xmm1, %xmm0
@@ -1783,9 +1746,8 @@ define double @test_v16f64_undef(<16 x d
;
; AVX-LABEL: test_v16f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm4
-; AVX-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
-; AVX-NEXT: vaddsd %xmm5, %xmm4, %xmm4
+; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm4, %xmm4
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vaddsd %xmm0, %xmm4, %xmm4
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1816,9 +1778,8 @@ define double @test_v16f64_undef(<16 x d
;
; AVX512-LABEL: test_v16f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm2
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm2, %xmm2
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll?rev=332920&r1=332919&r2=332920&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll Mon May 21 16:54:19 2018
@@ -647,31 +647,26 @@ define float @test_v16f32_one(<16 x floa
define float @test_v2f32_undef(<2 x float> %a0) {
; SSE2-LABEL: test_v2f32_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: mulss %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: mulss %xmm1, %xmm0
+; SSE2-NEXT: mulss {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: mulss %xmm0, %xmm0
-; SSE41-NEXT: mulss %xmm1, %xmm0
+; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: mulss {{.*}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm1
; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0)
ret float %1
@@ -681,23 +676,19 @@ define float @test_v4f32_undef(<4 x floa
; SSE2-LABEL: test_v4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: mulss %xmm0, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
-; SSE2-NEXT: mulss %xmm2, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: mulss %xmm1, %xmm0
+; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: mulss %xmm0, %xmm1
-; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: mulss %xmm2, %xmm1
+; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
@@ -707,9 +698,8 @@ define float @test_v4f32_undef(<4 x floa
;
; AVX-LABEL: test_v4f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm1
-; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
@@ -718,9 +708,8 @@ define float @test_v4f32_undef(<4 x floa
;
; AVX512-LABEL: test_v4f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
@@ -734,15 +723,13 @@ define float @test_v8f32_undef(<8 x floa
; SSE2-LABEL: test_v8f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: mulss %xmm0, %xmm2
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
+; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
-; SSE2-NEXT: mulss %xmm3, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: mulss %xmm2, %xmm0
+; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
@@ -756,10 +743,8 @@ define float @test_v8f32_undef(<8 x floa
;
; SSE41-LABEL: test_v8f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: mulss %xmm0, %xmm2
-; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: mulss %xmm3, %xmm2
+; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
@@ -777,9 +762,8 @@ define float @test_v8f32_undef(<8 x floa
;
; AVX-LABEL: test_v8f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm1
-; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
@@ -797,9 +781,8 @@ define float @test_v8f32_undef(<8 x floa
;
; AVX512-LABEL: test_v8f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
@@ -822,15 +805,13 @@ define float @test_v16f32_undef(<16 x fl
; SSE2-LABEL: test_v16f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm4
-; SSE2-NEXT: mulss %xmm0, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
+; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3]
+; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
-; SSE2-NEXT: movaps %xmm0, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
-; SSE2-NEXT: mulss %xmm5, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: mulss %xmm4, %xmm0
+; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
@@ -862,10 +843,8 @@ define float @test_v16f32_undef(<16 x fl
;
; SSE41-LABEL: test_v16f32_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm4
-; SSE41-NEXT: mulss %xmm0, %xmm4
-; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; SSE41-NEXT: mulss %xmm5, %xmm4
+; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
@@ -899,9 +878,8 @@ define float @test_v16f32_undef(<16 x fl
;
; AVX-LABEL: test_v16f32_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
@@ -934,9 +912,8 @@ define float @test_v16f32_undef(<16 x fl
;
; AVX512-LABEL: test_v16f32_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
@@ -1426,24 +1403,20 @@ define double @test_v16f64_one(<16 x dou
define double @test_v2f64_undef(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: mulsd %xmm1, %xmm0
+; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0)
ret double %1
@@ -1452,10 +1425,8 @@ define double @test_v2f64_undef(<2 x dou
define double @test_v4f64_undef(<4 x double> %a0) {
; SSE-LABEL: test_v4f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm2
-; SSE-NEXT: mulsd %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: mulsd %xmm2, %xmm0
+; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
; SSE-NEXT: mulsd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: mulsd %xmm1, %xmm0
@@ -1463,9 +1434,8 @@ define double @test_v4f64_undef(<4 x dou
;
; AVX-LABEL: test_v4f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm1
-; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX-NEXT: vmulsd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm1
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1475,9 +1445,8 @@ define double @test_v4f64_undef(<4 x dou
;
; AVX512-LABEL: test_v4f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1491,10 +1460,8 @@ define double @test_v4f64_undef(<4 x dou
define double @test_v8f64_undef(<8 x double> %a0) {
; SSE-LABEL: test_v8f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm4
-; SSE-NEXT: mulsd %xmm0, %xmm4
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: mulsd %xmm4, %xmm0
+; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
; SSE-NEXT: mulsd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: mulsd %xmm1, %xmm0
@@ -1508,9 +1475,8 @@ define double @test_v8f64_undef(<8 x dou
;
; AVX-LABEL: test_v8f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm2
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1527,9 +1493,8 @@ define double @test_v8f64_undef(<8 x dou
;
; AVX512-LABEL: test_v8f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
@@ -1551,10 +1516,8 @@ define double @test_v8f64_undef(<8 x dou
define double @test_v16f64_undef(<16 x double> %a0) {
; SSE-LABEL: test_v16f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movapd %xmm0, %xmm8
-; SSE-NEXT: mulsd %xmm0, %xmm8
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: mulsd %xmm8, %xmm0
+; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
; SSE-NEXT: mulsd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: mulsd %xmm1, %xmm0
@@ -1580,9 +1543,8 @@ define double @test_v16f64_undef(<16 x d
;
; AVX-LABEL: test_v16f64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm4
-; AVX-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0]
-; AVX-NEXT: vmulsd %xmm5, %xmm4, %xmm4
+; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
+; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm4, %xmm4
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm4
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
@@ -1613,9 +1575,8 @@ define double @test_v16f64_undef(<16 x d
;
; AVX512-LABEL: test_v16f64_undef:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm2
-; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
More information about the llvm-commits
mailing list