[llvm] r341219 - AMDGPU: Restrict extract_vector_elt combine to loads

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 31 08:39:52 PDT 2018


Author: arsenm
Date: Fri Aug 31 08:39:52 2018
New Revision: 341219

URL: http://llvm.org/viewvc/llvm-project?rev=341219&view=rev
Log:
AMDGPU: Restrict extract_vector_elt combine to loads

The intention is to enable the extract_vector_elt load combine,
and doing this for other operations interferes with more
useful optimizations on vectors.

Handle any type of load since in principle we should do the
same combine for the various load intrinsics.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/fexp.ll
    llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
    llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
    llvm/trunk/test/CodeGen/AMDGPU/reduction.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=341219&r1=341218&r2=341219&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Aug 31 08:39:52 2018
@@ -7941,7 +7941,8 @@ SDValue SITargetLowering::performExtract
   // elements. This exposes more load reduction opportunities by replacing
   // multiple small extract_vector_elements with a single 32-bit extract.
   auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (EltSize <= 16 &&
+  if (isa<MemSDNode>(Vec) &&
+      EltSize <= 16 &&
       EltVT.isByteSized() &&
       VecSize > 32 &&
       VecSize % 32 == 0 &&

Modified: llvm/trunk/test/CodeGen/AMDGPU/fexp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fexp.ll?rev=341219&r1=341218&r2=341219&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fexp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fexp.ll Fri Aug 31 08:39:52 2018
@@ -224,37 +224,23 @@ define <4 x half> @v_exp_v4f16(<4 x half
 ; SI-LABEL: v_exp_v4f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NEXT:    v_mov_b32_e32 v4, 0x3fb8aa3b
-; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NEXT:    v_mul_f32_e32 v3, v3, v4
-; SI-NEXT:    v_mul_f32_e32 v2, v2, v4
-; SI-NEXT:    v_mul_f32_e32 v1, v1, v4
-; SI-NEXT:    v_mul_f32_e32 v0, v0, v4
-; SI-NEXT:    v_exp_f32_e32 v3, v3
-; SI-NEXT:    v_exp_f32_e32 v2, v2
-; SI-NEXT:    v_exp_f32_e32 v1, v1
-; SI-NEXT:    v_exp_f32_e32 v0, v0
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3fb8aa3b
 ; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_mul_f32_e32 v0, v0, v4
+; SI-NEXT:    v_mul_f32_e32 v1, v1, v4
+; SI-NEXT:    v_mul_f32_e32 v2, v2, v4
+; SI-NEXT:    v_mul_f32_e32 v3, v3, v4
+; SI-NEXT:    v_exp_f32_e32 v0, v0
+; SI-NEXT:    v_exp_f32_e32 v1, v1
+; SI-NEXT:    v_exp_f32_e32 v2, v2
+; SI-NEXT:    v_exp_f32_e32 v3, v3
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_exp_v4f16:

Modified: llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll?rev=341219&r1=341218&r2=341219&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll Fri Aug 31 08:39:52 2018
@@ -290,79 +290,51 @@ define <4 x half> @test_fmax_legacy_ugt_
 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-SAFE-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
 ; SI-NNAN:       ; %bb.0:
 ; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v7
-; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v6
-; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v5
-; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v4
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-NNAN-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v4
+; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v5
+; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v6
+; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v7
 ; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ugt <4 x half> %a, %b
   %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
@@ -474,147 +446,91 @@ define <8 x half> @test_fmax_legacy_ugt_
 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v15, v15
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_or_b32_e32 v7, v6, v7
-; SI-SAFE-NEXT:    v_or_b32_e32 v5, v4, v5
-; SI-SAFE-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-SAFE-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v5
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v7
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
 ; SI-NNAN:       ; %bb.0:
 ; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-NNAN-NEXT:    v_max_f32_e32 v7, v7, v15
-; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
-; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
-; SI-NNAN-NEXT:    v_max_f32_e32 v4, v4, v12
-; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v11
-; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v10
-; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v9
-; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v8
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_or_b32_e32 v7, v6, v7
-; SI-NNAN-NEXT:    v_or_b32_e32 v5, v4, v5
-; SI-NNAN-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-NNAN-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v5
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v7
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v8
+; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v9
+; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v10
+; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v11
+; SI-NNAN-NEXT:    v_max_f32_e32 v4, v4, v12
+; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
+; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
+; SI-NNAN-NEXT:    v_max_f32_e32 v7, v7, v15
 ; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ugt <8 x half> %a, %b
   %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b

Modified: llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll?rev=341219&r1=341218&r2=341219&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll Fri Aug 31 08:39:52 2018
@@ -291,79 +291,51 @@ define <4 x half> @test_fmin_legacy_ule_
 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v3, v7, v3
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v2, v6, v2
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v1, v5, v1
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, v4, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-SAFE-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, v4, v0
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v1, v5, v1
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v2, v6, v2
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v3, v7, v3
 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v4f16:
 ; SI-NNAN:       ; %bb.0:
 ; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NNAN-NEXT:    v_min_f32_e32 v3, v3, v7
-; SI-NNAN-NEXT:    v_min_f32_e32 v2, v2, v6
-; SI-NNAN-NEXT:    v_min_f32_e32 v1, v1, v5
-; SI-NNAN-NEXT:    v_min_f32_e32 v0, v0, v4
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-NNAN-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NNAN-NEXT:    v_min_f32_e32 v0, v0, v4
+; SI-NNAN-NEXT:    v_min_f32_e32 v1, v1, v5
+; SI-NNAN-NEXT:    v_min_f32_e32 v2, v2, v6
+; SI-NNAN-NEXT:    v_min_f32_e32 v3, v3, v7
 ; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ule <4 x half> %a, %b
   %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
@@ -475,147 +447,91 @@ define <8 x half> @test_fmin_legacy_ule_
 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v15, v15
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v7, v15, v7
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v6, v14, v6
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v5, v13, v5
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v4, v12, v4
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v3, v11, v3
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v2, v10, v2
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v1, v9, v1
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, v8, v0
-; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_or_b32_e32 v7, v6, v7
-; SI-SAFE-NEXT:    v_or_b32_e32 v5, v4, v5
-; SI-SAFE-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-SAFE-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v5
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v7
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; SI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, v8, v0
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v1, v9, v1
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v2, v10, v2
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v3, v11, v3
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v4, v12, v4
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v5, v13, v5
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v6, v14, v6
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v7, v15, v7
 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-NNAN-LABEL: test_fmin_legacy_ule_v8f16:
 ; SI-NNAN:       ; %bb.0:
 ; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v15, v15
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
-; SI-NNAN-NEXT:    v_min_f32_e32 v7, v7, v15
-; SI-NNAN-NEXT:    v_min_f32_e32 v6, v6, v14
-; SI-NNAN-NEXT:    v_min_f32_e32 v5, v5, v13
-; SI-NNAN-NEXT:    v_min_f32_e32 v4, v4, v12
-; SI-NNAN-NEXT:    v_min_f32_e32 v3, v3, v11
-; SI-NNAN-NEXT:    v_min_f32_e32 v2, v2, v10
-; SI-NNAN-NEXT:    v_min_f32_e32 v1, v1, v9
-; SI-NNAN-NEXT:    v_min_f32_e32 v0, v0, v8
-; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
 ; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_or_b32_e32 v7, v6, v7
-; SI-NNAN-NEXT:    v_or_b32_e32 v5, v4, v5
-; SI-NNAN-NEXT:    v_or_b32_e32 v3, v2, v3
-; SI-NNAN-NEXT:    v_or_b32_e32 v1, v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v5
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v7
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; SI-NNAN-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NNAN-NEXT:    v_min_f32_e32 v0, v0, v8
+; SI-NNAN-NEXT:    v_min_f32_e32 v1, v1, v9
+; SI-NNAN-NEXT:    v_min_f32_e32 v2, v2, v10
+; SI-NNAN-NEXT:    v_min_f32_e32 v3, v3, v11
+; SI-NNAN-NEXT:    v_min_f32_e32 v4, v4, v12
+; SI-NNAN-NEXT:    v_min_f32_e32 v5, v5, v13
+; SI-NNAN-NEXT:    v_min_f32_e32 v6, v6, v14
+; SI-NNAN-NEXT:    v_min_f32_e32 v7, v7, v15
 ; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
   %cmp = fcmp ule <8 x half> %a, %b
   %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b

Modified: llvm/trunk/test/CodeGen/AMDGPU/reduction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reduction.ll?rev=341219&r1=341218&r2=341219&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reduction.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/reduction.ll Fri Aug 31 08:39:52 2018
@@ -47,8 +47,8 @@ entry:
 
 ; VI: s_waitcnt
 ; VI-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: v_sub_f16_e32 v0, v0, v1
-; VI-NEXT: v_sub_f16_e32 v0, v2, v0
+; VI-NEXT: v_sub_f16_e32 v0, v1, v0
+; VI-NEXT: v_add_f16_e32 v0, v2, v0
 ; VI-NEXT: s_setpc_b64
 define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) {
 entry:




More information about the llvm-commits mailing list