[llvm] r258612 - AMDGPU: Remove more unused intrinsics

Fri Jan 22 21:42:39 PST 2016

Author: arsenm
Date: Fri Jan 22 23:42:38 2016
New Revision: 258612

URL: http://llvm.org/viewvc/llvm-project?rev=258612&view=rev
Log:
AMDGPU: Remove more unused intrinsics

Replace tests with lrp with basic IR expansion

Removed:
    llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
    llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Jan 22 23:42:38 2016
@@ -917,17 +917,11 @@ SDValue AMDGPUTargetLowering::LowerINTRI
 
   switch (IntrinsicID) {
     default: return Op;
-    case AMDGPUIntrinsic::AMDGPU_lrp:
-      return LowerIntrinsicLRP(Op, DAG);
-
     case AMDGPUIntrinsic::AMDGPU_clamp:
     case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
       return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
 
-    case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
-      return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
-
     case Intrinsic::AMDGPU_rsq_clamped:
       assert(Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS);
       return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
@@ -983,23 +977,6 @@ SDValue AMDGPUTargetLowering::LowerINTRI
   }
 }
 
-/// Linear Interpolation
-/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
-SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
-                                                SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  EVT VT = Op.getValueType();
-  // TODO: Should this propagate fast-math-flags?
-  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
-                                DAG.getConstantFP(1.0f, DL, MVT::f32),
-                                Op.getOperand(1));
-  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
-                                                    Op.getOperand(3));
-  return DAG.getNode(ISD::FADD, DL, VT,
-      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
-      OneSubAC);
-}
-
 /// \brief Generate Min/Max node
 SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
                                                    EVT VT,

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Jan 22 23:42:38 2016
@@ -176,7 +176,6 @@ public:
                           SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
 
-  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
   SDValue CombineFMinMaxLegacy(SDLoc DL,
                                EVT VT,
                                SDValue LHS,

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td Fri Jan 22 23:42:38 2016
@@ -12,34 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 let TargetPrefix = "AMDGPU", isTarget = 1 in {
-
-  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
-  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 
-  // This is named backwards (instead of rsq_legacy) so we don't have
-  // to define it with the public builtins intrinsics. This is a
-  // workaround for how intrinsic names are parsed. If the name is
-  // llvm.AMDGPU.rsq.legacy, the parser assumes that you meant
-  // llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name.
-  def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
   def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
   def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
   def int_AMDGPU_kilp : Intrinsic<[], [], []>;
-  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Fri Jan 22 23:42:38 2016
@@ -615,12 +615,6 @@ SDValue R600TargetLowering::LowerOperati
     unsigned IntrinsicID =
                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
     switch (IntrinsicID) {
-    case AMDGPUIntrinsic::AMDGPU_store_output: {
-      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
-      MFI->LiveOuts.push_back(Reg);
-      return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
-    }
     case AMDGPUIntrinsic::R600_store_swizzle: {
       SDLoc DL(Op);
       const SDValue Args[8] = {

Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Fri Jan 22 23:42:38 2016
@@ -160,7 +160,8 @@ class R600_2OP <bits<11> inst, string op
   let Inst{63-32} = Word1;
 }
 
-class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+class R600_2OP_Helper <bits<11> inst, string opName,
+                       SDPatternOperator node = null_frag,
                        InstrItinClass itin = AnyALU> :
     R600_2OP <inst, opName,
               [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
@@ -678,7 +679,7 @@ let Predicates = [isR600toCayman] in {
 
 def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
 // Non-IEEE MUL: 0 * anything = 0
-def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE">;
 def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
 // TODO: Do these actually match the regular fmin/fmax behavior?
 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
@@ -1136,11 +1137,6 @@ def FNEG_R600 : FNEG<R600_Reg32>;
 // FIXME: Should be predicated on unsafe fp math.
 multiclass DIV_Common <InstR600 recip_ieee> {
 def : Pat<
-  (int_AMDGPU_div f32:$src0, f32:$src1),
-  (MUL_IEEE $src0, (recip_ieee $src1))
->;
-
-def : Pat<
   (fdiv f32:$src0, f32:$src1),
   (MUL_IEEE $src0, (recip_ieee $src1))
 >;

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Jan 22 23:42:38 2016
@@ -1450,7 +1450,7 @@ defm V_SUBREV_F32 : VOP2Inst <vop2<0x5,
 let isCommutable = 1 in {
 
 defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
-  VOP_F32_F32_F32, int_AMDGPU_mul
+  VOP_F32_F32_F32
 >;
 
 defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
@@ -2049,13 +2049,6 @@ def SI_CONSTDATA_PTR : InstSI <
 
 let Predicates = [isGCN] in {
 
-def : Pat<
-  (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
-  (V_CNDMASK_B32_e64 $src2, $src1,
-                     (V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0,
-                                       DSTCLAMP.NONE, DSTOMOD.NONE))
->;
-
 def : Pat <
   (int_AMDGPU_kilp),
   (SI_KILL 0xbf800000)
@@ -2713,11 +2706,6 @@ def : Pat <
 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 
 def : Pat <
-  (int_AMDGPU_div f32:$src0, f32:$src1),
-  (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
->;
-
-def : Pat <
   (int_AMDGPU_cube v4f32:$src),
   (REG_SEQUENCE VReg_128,
     (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),

Modified: llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll Fri Jan 22 23:42:38 2016
@@ -1,9 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=cedar
+; RUN: llc -march=r600 -mcpu=cedar < %s
 
-;This test ensures that R600 backend can handle ifcvt properly
-;and do not generate ALU clauses with more than 128 instructions.
+; This test ensures that R600 backend can handle ifcvt properly
+; and do not generate ALU clauses with more than 128 instructions.
 
-define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #1 {
 main_body:
   %0 = extractelement <4 x float> %reg0, i32 0
   %1 = extractelement <4 x float> %reg0, i32 1
@@ -149,10 +149,10 @@ IF137:
   %137 = bitcast float %136 to i32
   br label %LOOP
 
-ENDIF136:                                         ; preds = %main_body, %ENDIF154
-  %temp68.1 = phi float [ %600, %ENDIF154 ], [ 0.000000e+00, %main_body ]
-  %temp69.0 = phi float [ %602, %ENDIF154 ], [ 0.000000e+00, %main_body ]
-  %temp70.0 = phi float [ %604, %ENDIF154 ], [ 1.000000e+00, %main_body ]
+ENDIF136:                                         ; preds = %ENDIF154, %main_body
+  %temp68.1 = phi float [ %591, %ENDIF154 ], [ 0.000000e+00, %main_body ]
+  %temp69.0 = phi float [ %593, %ENDIF154 ], [ 0.000000e+00, %main_body ]
+  %temp70.0 = phi float [ %595, %ENDIF154 ], [ 1.000000e+00, %main_body ]
   %138 = fmul float %26, 0x3F847AE140000000
   %139 = fmul float %27, 0x3F847AE140000000
   %140 = fmul float %28, 0x3F847AE140000000
@@ -217,957 +217,1088 @@ ENDIF136:
   %199 = fmul float 0xBE5EFB4CC0000000, %182
   %200 = fmul float %199, %182
   %201 = call float @llvm.AMDIL.exp.(float %200)
-  %202 = call float @llvm.AMDGPU.lrp(float %201, float %198, float 0x3FA99999A0000000)
-  %203 = fadd float %202, 0x3FF4CCCCC0000000
-  %204 = fmul float %203, 0x3FE1C71C80000000
-  %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00)
-  %206 = fadd float %202, 0x3FF4CCCCC0000000
-  %207 = fmul float %206, 0x3FE1C71C80000000
-  %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00)
-  %209 = fadd float %202, 2.000000e+00
-  %210 = fmul float %209, 0x3FD611A7A0000000
-  %211 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00)
-  %212 = fmul float 2.000000e+00, %205
-  %213 = fsub float -0.000000e+00, %212
-  %214 = fadd float 3.000000e+00, %213
-  %215 = fmul float %205, %214
-  %216 = fmul float %205, %215
-  %217 = fmul float 2.000000e+00, %208
-  %218 = fsub float -0.000000e+00, %217
-  %219 = fadd float 3.000000e+00, %218
-  %220 = fmul float %208, %219
-  %221 = fmul float %208, %220
-  %222 = fmul float 2.000000e+00, %211
-  %223 = fsub float -0.000000e+00, %222
-  %224 = fadd float 3.000000e+00, %223
-  %225 = fmul float %211, %224
-  %226 = fmul float %211, %225
-  %227 = fmul float %26, 0x3F368B5CC0000000
-  %228 = fmul float %27, 0x3F368B5CC0000000
-  %229 = insertelement <4 x float> undef, float %227, i32 0
-  %230 = insertelement <4 x float> %229, float %228, i32 1
-  %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 2
-  %232 = insertelement <4 x float> %231, float 0.000000e+00, i32 3
-  %233 = extractelement <4 x float> %232, i32 0
-  %234 = extractelement <4 x float> %232, i32 1
-  %235 = insertelement <4 x float> undef, float %233, i32 0
-  %236 = insertelement <4 x float> %235, float %234, i32 1
-  %237 = insertelement <4 x float> %236, float undef, i32 2
-  %238 = insertelement <4 x float> %237, float undef, i32 3
-  %239 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %238, i32 17, i32 1, i32 2)
-  %240 = extractelement <4 x float> %239, i32 0
-  %241 = insertelement <4 x float> undef, float %240, i32 0
-  %242 = insertelement <4 x float> %241, float %228, i32 1
-  %243 = insertelement <4 x float> %242, float 0.000000e+00, i32 2
-  %244 = insertelement <4 x float> %243, float 0.000000e+00, i32 3
-  %245 = extractelement <4 x float> %244, i32 0
-  %246 = insertelement <4 x float> undef, float %245, i32 0
-  %247 = insertelement <4 x float> %246, float undef, i32 1
-  %248 = insertelement <4 x float> %247, float undef, i32 2
-  %249 = insertelement <4 x float> %248, float undef, i32 3
-  %250 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %249, i32 18, i32 2, i32 1)
-  %251 = extractelement <4 x float> %250, i32 0
-  %252 = extractelement <4 x float> %250, i32 1
-  %253 = extractelement <4 x float> %250, i32 2
-  %254 = extractelement <4 x float> %250, i32 3
-  %255 = fmul float %251, %216
-  %256 = fmul float %252, %221
-  %257 = fmul float %253, %226
-  %258 = fmul float %254, 0.000000e+00
-  %259 = fadd float %202, 0x3FF4CCCCC0000000
-  %260 = fmul float %259, 0x3FE1C71C80000000
-  %261 = call float @llvm.AMDIL.clamp.(float %260, float 0.000000e+00, float 1.000000e+00)
-  %262 = fadd float %202, 0x3FF4CCCCC0000000
-  %263 = fmul float %262, 0x3FE1C71C80000000
-  %264 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00)
-  %265 = fadd float %202, 2.000000e+00
-  %266 = fmul float %265, 0x3FD611A7A0000000
-  %267 = call float @llvm.AMDIL.clamp.(float %266, float 0.000000e+00, float 1.000000e+00)
-  %268 = fmul float 2.000000e+00, %261
-  %269 = fsub float -0.000000e+00, %268
-  %270 = fadd float 3.000000e+00, %269
-  %271 = fmul float %261, %270
-  %272 = fmul float %261, %271
-  %273 = fmul float 2.000000e+00, %264
-  %274 = fsub float -0.000000e+00, %273
-  %275 = fadd float 3.000000e+00, %274
-  %276 = fmul float %264, %275
-  %277 = fmul float %264, %276
-  %278 = fmul float 2.000000e+00, %267
-  %279 = fsub float -0.000000e+00, %278
-  %280 = fadd float 3.000000e+00, %279
-  %281 = fmul float %267, %280
-  %282 = fmul float %267, %281
-  %283 = fmul float %26, 0x3F22DFD6A0000000
-  %284 = fmul float %27, 0x3F22DFD6A0000000
-  %285 = insertelement <4 x float> undef, float %283, i32 0
-  %286 = insertelement <4 x float> %285, float %284, i32 1
-  %287 = insertelement <4 x float> %286, float 0.000000e+00, i32 2
-  %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 3
-  %289 = extractelement <4 x float> %288, i32 0
-  %290 = extractelement <4 x float> %288, i32 1
-  %291 = insertelement <4 x float> undef, float %289, i32 0
-  %292 = insertelement <4 x float> %291, float %290, i32 1
-  %293 = insertelement <4 x float> %292, float undef, i32 2
-  %294 = insertelement <4 x float> %293, float undef, i32 3
-  %295 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %294, i32 19, i32 3, i32 2)
-  %296 = extractelement <4 x float> %295, i32 0
-  %297 = extractelement <4 x float> %295, i32 1
-  %298 = extractelement <4 x float> %295, i32 2
-  %299 = extractelement <4 x float> %295, i32 3
-  %300 = fmul float %296, %272
-  %301 = fmul float %297, %277
-  %302 = fmul float %298, %282
-  %303 = fmul float %299, 0.000000e+00
-  %304 = fmul float %temp68.1, %37
-  %305 = fmul float %temp68.1, %38
-  %306 = fmul float %temp68.1, %39
-  %307 = fmul float %temp69.0, %40
-  %308 = fadd float %307, %304
-  %309 = fmul float %temp69.0, %41
-  %310 = fadd float %309, %305
-  %311 = fmul float %temp69.0, %42
-  %312 = fadd float %311, %306
-  %313 = fmul float %temp70.0, %34
-  %314 = fadd float %313, %308
-  %315 = fmul float %temp70.0, %35
-  %316 = fadd float %315, %310
-  %317 = fmul float %temp70.0, %36
-  %318 = fadd float %317, %312
-  %319 = insertelement <4 x float> undef, float %314, i32 0
-  %320 = insertelement <4 x float> %319, float %316, i32 1
-  %321 = insertelement <4 x float> %320, float %318, i32 2
-  %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 3
-  %323 = insertelement <4 x float> undef, float %314, i32 0
-  %324 = insertelement <4 x float> %323, float %316, i32 1
-  %325 = insertelement <4 x float> %324, float %318, i32 2
-  %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
-  %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
-  %328 = call float @llvm.AMDGPU.rsq.clamped.f32(float %327)
-  %329 = fmul float %314, %328
-  %330 = fmul float %316, %328
-  %331 = fmul float %318, %328
-  %332 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %333 = extractelement <4 x float> %332, i32 0
-  %334 = fsub float -0.000000e+00, %333
-  %335 = fadd float 1.000000e+00, %334
-  %336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %337 = extractelement <4 x float> %336, i32 0
-  %338 = fsub float -0.000000e+00, %337
-  %339 = fadd float 1.000000e+00, %338
-  %340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %341 = extractelement <4 x float> %340, i32 0
-  %342 = fsub float -0.000000e+00, %341
-  %343 = fadd float 1.000000e+00, %342
-  %344 = fsub float -0.000000e+00, %335
-  %345 = fadd float %202, %344
-  %346 = fsub float -0.000000e+00, %339
-  %347 = fadd float %202, %346
-  %348 = fadd float %347, 0xBFE3333340000000
-  %349 = fsub float -0.000000e+00, %202
-  %350 = fsub float -0.000000e+00, %343
-  %351 = fadd float %349, %350
-  %352 = insertelement <4 x float> undef, float %43, i32 0
-  %353 = insertelement <4 x float> %352, float %44, i32 1
-  %354 = insertelement <4 x float> %353, float %45, i32 2
-  %355 = insertelement <4 x float> %354, float 0.000000e+00, i32 3
-  %356 = insertelement <4 x float> undef, float %43, i32 0
-  %357 = insertelement <4 x float> %356, float %44, i32 1
-  %358 = insertelement <4 x float> %357, float %45, i32 2
-  %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
-  %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
-  %361 = call float @llvm.AMDGPU.rsq.clamped.f32(float %360)
-  %362 = fmul float %45, %361
-  %363 = call float @fabs(float %362)
-  %364 = fmul float %176, 0x3FECCCCCC0000000
-  %365 = fadd float %364, %363
-  %366 = fadd float %365, 0xBFEFAE1480000000
-  %367 = fmul float %366, 0xC023FFFFC0000000
-  %368 = call float @llvm.AMDIL.clamp.(float %367, float 0.000000e+00, float 1.000000e+00)
-  %369 = fsub float -0.000000e+00, %335
-  %370 = fadd float %202, %369
-  %371 = fadd float %370, 0x3FBEB851E0000000
-  %372 = fsub float -0.000000e+00, %339
-  %373 = fadd float %202, %372
-  %374 = fadd float %373, 0xBFE0A3D700000000
-  %375 = fsub float -0.000000e+00, %202
-  %376 = fsub float -0.000000e+00, %343
-  %377 = fadd float %375, %376
-  %378 = insertelement <4 x float> undef, float %43, i32 0
-  %379 = insertelement <4 x float> %378, float %44, i32 1
-  %380 = insertelement <4 x float> %379, float %45, i32 2
-  %381 = insertelement <4 x float> %380, float 0.000000e+00, i32 3
-  %382 = insertelement <4 x float> undef, float %43, i32 0
-  %383 = insertelement <4 x float> %382, float %44, i32 1
-  %384 = insertelement <4 x float> %383, float %45, i32 2
-  %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
-  %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
-  %387 = call float @llvm.AMDGPU.rsq.clamped.f32(float %386)
-  %388 = fmul float %45, %387
-  %389 = call float @fabs(float %388)
-  %390 = fmul float %176, 0x3FF51EB860000000
-  %391 = fadd float %390, %389
-  %392 = fadd float %391, 0xBFEFAE1480000000
-  %393 = fmul float %392, 0xC0490001A0000000
-  %394 = call float @llvm.AMDIL.clamp.(float %393, float 0.000000e+00, float 1.000000e+00)
-  %395 = fmul float 2.000000e+00, %368
-  %396 = fsub float -0.000000e+00, %395
-  %397 = fadd float 3.000000e+00, %396
-  %398 = fmul float %368, %397
-  %399 = fmul float %368, %398
-  %400 = call float @llvm.AMDGPU.lrp(float %399, float %255, float %345)
-  %401 = call float @llvm.AMDGPU.lrp(float %399, float %256, float %348)
-  %402 = call float @llvm.AMDGPU.lrp(float %399, float %257, float %351)
-  %403 = call float @llvm.AMDGPU.lrp(float %399, float %258, float 0.000000e+00)
-  %404 = fmul float 2.000000e+00, %394
-  %405 = fsub float -0.000000e+00, %404
-  %406 = fadd float 3.000000e+00, %405
-  %407 = fmul float %394, %406
-  %408 = fmul float %394, %407
-  %409 = call float @llvm.AMDGPU.lrp(float %408, float %255, float %371)
-  %410 = call float @llvm.AMDGPU.lrp(float %408, float %256, float %374)
-  %411 = call float @llvm.AMDGPU.lrp(float %408, float %257, float %377)
-  %412 = call float @llvm.AMDGPU.lrp(float %408, float %258, float 0x3FD3333340000000)
-  %413 = fcmp oge float 2.200000e+03, %179
-  %414 = sext i1 %413 to i32
-  %415 = bitcast i32 %414 to float
-  %416 = bitcast float %415 to i32
-  %417 = icmp ne i32 %416, 0
-  br i1 %417, label %IF161, label %ENDIF160
+  %one.sub.a.i = fsub float 1.000000e+00, %201
+  %one.sub.ac.i = fmul float %one.sub.a.i, 0x3FA99999A0000000
+  %mul.i = fmul float %198, 0x3FA99999A0000000
+  %result.i = fadd float %mul.i, %one.sub.ac.i
+  %202 = fadd float %result.i, 0x3FF4CCCCC0000000
+  %203 = fmul float %202, 0x3FE1C71C80000000
+  %204 = call float @llvm.AMDIL.clamp.(float %203, float 0.000000e+00, float 1.000000e+00)
+  %205 = fadd float %result.i, 0x3FF4CCCCC0000000
+  %206 = fmul float %205, 0x3FE1C71C80000000
+  %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00)
+  %208 = fadd float %result.i, 2.000000e+00
+  %209 = fmul float %208, 0x3FD611A7A0000000
+  %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00)
+  %211 = fmul float 2.000000e+00, %204
+  %212 = fsub float -0.000000e+00, %211
+  %213 = fadd float 3.000000e+00, %212
+  %214 = fmul float %204, %213
+  %215 = fmul float %204, %214
+  %216 = fmul float 2.000000e+00, %207
+  %217 = fsub float -0.000000e+00, %216
+  %218 = fadd float 3.000000e+00, %217
+  %219 = fmul float %207, %218
+  %220 = fmul float %207, %219
+  %221 = fmul float 2.000000e+00, %210
+  %222 = fsub float -0.000000e+00, %221
+  %223 = fadd float 3.000000e+00, %222
+  %224 = fmul float %210, %223
+  %225 = fmul float %210, %224
+  %226 = fmul float %26, 0x3F368B5CC0000000
+  %227 = fmul float %27, 0x3F368B5CC0000000
+  %228 = insertelement <4 x float> undef, float %226, i32 0
+  %229 = insertelement <4 x float> %228, float %227, i32 1
+  %230 = insertelement <4 x float> %229, float 0.000000e+00, i32 2
+  %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 3
+  %232 = extractelement <4 x float> %231, i32 0
+  %233 = extractelement <4 x float> %231, i32 1
+  %234 = insertelement <4 x float> undef, float %232, i32 0
+  %235 = insertelement <4 x float> %234, float %233, i32 1
+  %236 = insertelement <4 x float> %235, float undef, i32 2
+  %237 = insertelement <4 x float> %236, float undef, i32 3
+  %238 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %237, i32 17, i32 1, i32 2)
+  %239 = extractelement <4 x float> %238, i32 0
+  %240 = insertelement <4 x float> undef, float %239, i32 0
+  %241 = insertelement <4 x float> %240, float %227, i32 1
+  %242 = insertelement <4 x float> %241, float 0.000000e+00, i32 2
+  %243 = insertelement <4 x float> %242, float 0.000000e+00, i32 3
+  %244 = extractelement <4 x float> %243, i32 0
+  %245 = insertelement <4 x float> undef, float %244, i32 0
+  %246 = insertelement <4 x float> %245, float undef, i32 1
+  %247 = insertelement <4 x float> %246, float undef, i32 2
+  %248 = insertelement <4 x float> %247, float undef, i32 3
+  %249 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %248, i32 18, i32 2, i32 1)
+  %250 = extractelement <4 x float> %249, i32 0
+  %251 = extractelement <4 x float> %249, i32 1
+  %252 = extractelement <4 x float> %249, i32 2
+  %253 = extractelement <4 x float> %249, i32 3
+  %254 = fmul float %250, %215
+  %255 = fmul float %251, %220
+  %256 = fmul float %252, %225
+  %257 = fmul float %253, 0.000000e+00
+  %258 = fadd float %result.i, 0x3FF4CCCCC0000000
+  %259 = fmul float %258, 0x3FE1C71C80000000
+  %260 = call float @llvm.AMDIL.clamp.(float %259, float 0.000000e+00, float 1.000000e+00)
+  %261 = fadd float %result.i, 0x3FF4CCCCC0000000
+  %262 = fmul float %261, 0x3FE1C71C80000000
+  %263 = call float @llvm.AMDIL.clamp.(float %262, float 0.000000e+00, float 1.000000e+00)
+  %264 = fadd float %result.i, 2.000000e+00
+  %265 = fmul float %264, 0x3FD611A7A0000000
+  %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00)
+  %267 = fmul float 2.000000e+00, %260
+  %268 = fsub float -0.000000e+00, %267
+  %269 = fadd float 3.000000e+00, %268
+  %270 = fmul float %260, %269
+  %271 = fmul float %260, %270
+  %272 = fmul float 2.000000e+00, %263
+  %273 = fsub float -0.000000e+00, %272
+  %274 = fadd float 3.000000e+00, %273
+  %275 = fmul float %263, %274
+  %276 = fmul float %263, %275
+  %277 = fmul float 2.000000e+00, %266
+  %278 = fsub float -0.000000e+00, %277
+  %279 = fadd float 3.000000e+00, %278
+  %280 = fmul float %266, %279
+  %281 = fmul float %266, %280
+  %282 = fmul float %26, 0x3F22DFD6A0000000
+  %283 = fmul float %27, 0x3F22DFD6A0000000
+  %284 = insertelement <4 x float> undef, float %282, i32 0
+  %285 = insertelement <4 x float> %284, float %283, i32 1
+  %286 = insertelement <4 x float> %285, float 0.000000e+00, i32 2
+  %287 = insertelement <4 x float> %286, float 0.000000e+00, i32 3
+  %288 = extractelement <4 x float> %287, i32 0
+  %289 = extractelement <4 x float> %287, i32 1
+  %290 = insertelement <4 x float> undef, float %288, i32 0
+  %291 = insertelement <4 x float> %290, float %289, i32 1
+  %292 = insertelement <4 x float> %291, float undef, i32 2
+  %293 = insertelement <4 x float> %292, float undef, i32 3
+  %294 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %293, i32 19, i32 3, i32 2)
+  %295 = extractelement <4 x float> %294, i32 0
+  %296 = extractelement <4 x float> %294, i32 1
+  %297 = extractelement <4 x float> %294, i32 2
+  %298 = extractelement <4 x float> %294, i32 3
+  %299 = fmul float %295, %271
+  %300 = fmul float %296, %276
+  %301 = fmul float %297, %281
+  %302 = fmul float %298, 0.000000e+00
+  %303 = fmul float %temp68.1, %37
+  %304 = fmul float %temp68.1, %38
+  %305 = fmul float %temp68.1, %39
+  %306 = fmul float %temp69.0, %40
+  %307 = fadd float %306, %303
+  %308 = fmul float %temp69.0, %41
+  %309 = fadd float %308, %304
+  %310 = fmul float %temp69.0, %42
+  %311 = fadd float %310, %305
+  %312 = fmul float %temp70.0, %34
+  %313 = fadd float %312, %307
+  %314 = fmul float %temp70.0, %35
+  %315 = fadd float %314, %309
+  %316 = fmul float %temp70.0, %36
+  %317 = fadd float %316, %311
+  %318 = insertelement <4 x float> undef, float %313, i32 0
+  %319 = insertelement <4 x float> %318, float %315, i32 1
+  %320 = insertelement <4 x float> %319, float %317, i32 2
+  %321 = insertelement <4 x float> %320, float 0.000000e+00, i32 3
+  %322 = insertelement <4 x float> undef, float %313, i32 0
+  %323 = insertelement <4 x float> %322, float %315, i32 1
+  %324 = insertelement <4 x float> %323, float %317, i32 2
+  %325 = insertelement <4 x float> %324, float 0.000000e+00, i32 3
+  %326 = call float @llvm.AMDGPU.dp4(<4 x float> %321, <4 x float> %325)
+  %327 = call float @llvm.AMDGPU.rsq.clamped.f32(float %326)
+  %328 = fmul float %313, %327
+  %329 = fmul float %315, %327
+  %330 = fmul float %317, %327
+  %331 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %332 = extractelement <4 x float> %331, i32 0
+  %333 = fsub float -0.000000e+00, %332
+  %334 = fadd float 1.000000e+00, %333
+  %335 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %336 = extractelement <4 x float> %335, i32 0
+  %337 = fsub float -0.000000e+00, %336
+  %338 = fadd float 1.000000e+00, %337
+  %339 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %340 = extractelement <4 x float> %339, i32 0
+  %341 = fsub float -0.000000e+00, %340
+  %342 = fadd float 1.000000e+00, %341
+  %343 = fsub float -0.000000e+00, %334
+  %344 = fadd float %result.i, %343
+  %345 = fsub float -0.000000e+00, %338
+  %346 = fadd float %result.i, %345
+  %347 = fadd float %346, 0xBFE3333340000000
+  %348 = fsub float -0.000000e+00, %result.i
+  %349 = fsub float -0.000000e+00, %342
+  %350 = fadd float %348, %349
+  %351 = insertelement <4 x float> undef, float %43, i32 0
+  %352 = insertelement <4 x float> %351, float %44, i32 1
+  %353 = insertelement <4 x float> %352, float %45, i32 2
+  %354 = insertelement <4 x float> %353, float 0.000000e+00, i32 3
+  %355 = insertelement <4 x float> undef, float %43, i32 0
+  %356 = insertelement <4 x float> %355, float %44, i32 1
+  %357 = insertelement <4 x float> %356, float %45, i32 2
+  %358 = insertelement <4 x float> %357, float 0.000000e+00, i32 3
+  %359 = call float @llvm.AMDGPU.dp4(<4 x float> %354, <4 x float> %358)
+  %360 = call float @llvm.AMDGPU.rsq.clamped.f32(float %359)
+  %361 = fmul float %45, %360
+  %362 = call float @fabs(float %361)
+  %363 = fmul float %176, 0x3FECCCCCC0000000
+  %364 = fadd float %363, %362
+  %365 = fadd float %364, 0xBFEFAE1480000000
+  %366 = fmul float %365, 0xC023FFFFC0000000
+  %367 = call float @llvm.AMDIL.clamp.(float %366, float 0.000000e+00, float 1.000000e+00)
+  %368 = fsub float -0.000000e+00, %334
+  %369 = fadd float %result.i, %368
+  %370 = fadd float %369, 0x3FBEB851E0000000
+  %371 = fsub float -0.000000e+00, %338
+  %372 = fadd float %result.i, %371
+  %373 = fadd float %372, 0xBFE0A3D700000000
+  %374 = fsub float -0.000000e+00, %result.i
+  %375 = fsub float -0.000000e+00, %342
+  %376 = fadd float %374, %375
+  %377 = insertelement <4 x float> undef, float %43, i32 0
+  %378 = insertelement <4 x float> %377, float %44, i32 1
+  %379 = insertelement <4 x float> %378, float %45, i32 2
+  %380 = insertelement <4 x float> %379, float 0.000000e+00, i32 3
+  %381 = insertelement <4 x float> undef, float %43, i32 0
+  %382 = insertelement <4 x float> %381, float %44, i32 1
+  %383 = insertelement <4 x float> %382, float %45, i32 2
+  %384 = insertelement <4 x float> %383, float 0.000000e+00, i32 3
+  %385 = call float @llvm.AMDGPU.dp4(<4 x float> %380, <4 x float> %384)
+  %386 = call float @llvm.AMDGPU.rsq.clamped.f32(float %385)
+  %387 = fmul float %45, %386
+  %388 = call float @fabs(float %387)
+  %389 = fmul float %176, 0x3FF51EB860000000
+  %390 = fadd float %389, %388
+  %391 = fadd float %390, 0xBFEFAE1480000000
+  %392 = fmul float %391, 0xC0490001A0000000
+  %393 = call float @llvm.AMDIL.clamp.(float %392, float 0.000000e+00, float 1.000000e+00)
+  %394 = fmul float 2.000000e+00, %367
+  %395 = fsub float -0.000000e+00, %394
+  %396 = fadd float 3.000000e+00, %395
+  %397 = fmul float %367, %396
+  %398 = fmul float %367, %397
+  %one.sub.a.i169 = fsub float 1.000000e+00, %398
+  %one.sub.ac.i170 = fmul float %one.sub.a.i169, %344
+  %mul.i171 = fmul float %254, %344
+  %result.i172 = fadd float %mul.i171, %one.sub.ac.i170
+  %one.sub.a.i165 = fsub float 1.000000e+00, %398
+  %one.sub.ac.i166 = fmul float %one.sub.a.i165, %347
+  %mul.i167 = fmul float %255, %347
+  %result.i168 = fadd float %mul.i167, %one.sub.ac.i166
+  %one.sub.a.i161 = fsub float 1.000000e+00, %398
+  %one.sub.ac.i162 = fmul float %one.sub.a.i161, %350
+  %mul.i163 = fmul float %256, %350
+  %result.i164 = fadd float %mul.i163, %one.sub.ac.i162
+  %one.sub.a.i157 = fsub float 1.000000e+00, %398
+  %one.sub.ac.i158 = fmul float %one.sub.a.i157, 0.000000e+00
+  %mul.i159 = fmul float %257, 0.000000e+00
+  %result.i160 = fadd float %mul.i159, %one.sub.ac.i158
+  %399 = fmul float 2.000000e+00, %393
+  %400 = fsub float -0.000000e+00, %399
+  %401 = fadd float 3.000000e+00, %400
+  %402 = fmul float %393, %401
+  %403 = fmul float %393, %402
+  %one.sub.a.i153 = fsub float 1.000000e+00, %403
+  %one.sub.ac.i154 = fmul float %one.sub.a.i153, %370
+  %mul.i155 = fmul float %254, %370
+  %result.i156 = fadd float %mul.i155, %one.sub.ac.i154
+  %one.sub.a.i149 = fsub float 1.000000e+00, %403
+  %one.sub.ac.i150 = fmul float %one.sub.a.i149, %373
+  %mul.i151 = fmul float %255, %373
+  %result.i152 = fadd float %mul.i151, %one.sub.ac.i150
+  %one.sub.a.i145 = fsub float 1.000000e+00, %403
+  %one.sub.ac.i146 = fmul float %one.sub.a.i145, %376
+  %mul.i147 = fmul float %256, %376
+  %result.i148 = fadd float %mul.i147, %one.sub.ac.i146
+  %one.sub.a.i141 = fsub float 1.000000e+00, %403
+  %one.sub.ac.i142 = fmul float %one.sub.a.i141, 0x3FD3333340000000
+  %mul.i143 = fmul float %257, 0x3FD3333340000000
+  %result.i144 = fadd float %mul.i143, %one.sub.ac.i142
+  %404 = fcmp oge float 2.200000e+03, %179
+  %405 = sext i1 %404 to i32
+  %406 = bitcast i32 %405 to float
+  %407 = bitcast float %406 to i32
+  %408 = icmp ne i32 %407, 0
+  br i1 %408, label %IF161, label %ENDIF160
 
 LOOP:                                             ; preds = %ENDIF139, %IF137
-  %temp88.0 = phi float [ 0.000000e+00, %IF137 ], [ %446, %ENDIF139 ]
+  %temp88.0 = phi float [ 0.000000e+00, %IF137 ], [ %437, %ENDIF139 ]
   %temp92.0 = phi float [ 1.000000e+00, %IF137 ], [ %.temp92.0, %ENDIF139 ]
-  %temp96.0 = phi float [ 0.000000e+00, %IF137 ], [ %477, %ENDIF139 ]
-  %418 = bitcast float %temp96.0 to i32
-  %419 = icmp sge i32 %418, %137
-  %420 = sext i1 %419 to i32
-  %421 = bitcast i32 %420 to float
-  %422 = bitcast float %421 to i32
-  %423 = icmp ne i32 %422, 0
-  br i1 %423, label %IF140, label %ENDIF139
+  %temp96.0 = phi float [ 0.000000e+00, %IF137 ], [ %468, %ENDIF139 ]
+  %409 = bitcast float %temp96.0 to i32
+  %410 = icmp sge i32 %409, %137
+  %411 = sext i1 %410 to i32
+  %412 = bitcast i32 %411 to float
+  %413 = bitcast float %412 to i32
+  %414 = icmp ne i32 %413, 0
+  br i1 %414, label %IF140, label %ENDIF139
 
 IF140:                                            ; preds = %LOOP
-  %424 = fmul float %133, 5.000000e-01
-  %425 = fmul float %129, %temp92.0
-  %426 = fadd float %425, %22
-  %427 = fmul float %130, %temp92.0
-  %428 = fadd float %427, %23
-  %429 = insertelement <4 x float> undef, float %426, i32 0
-  %430 = insertelement <4 x float> %429, float %428, i32 1
-  %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 2
-  %432 = insertelement <4 x float> %431, float 0.000000e+00, i32 3
-  %433 = extractelement <4 x float> %432, i32 0
-  %434 = extractelement <4 x float> %432, i32 1
-  %435 = insertelement <4 x float> undef, float %433, i32 0
-  %436 = insertelement <4 x float> %435, float %434, i32 1
-  %437 = insertelement <4 x float> %436, float undef, i32 2
-  %438 = insertelement <4 x float> %437, float undef, i32 3
-  %439 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %438, i32 20, i32 4, i32 2)
-  %440 = extractelement <4 x float> %439, i32 3
-  %441 = fcmp oge float %temp92.0, %440
-  %442 = sext i1 %441 to i32
-  %443 = bitcast i32 %442 to float
-  %444 = bitcast float %443 to i32
-  %445 = icmp ne i32 %444, 0
-  br i1 %445, label %IF146, label %ENDIF145
+  %415 = fmul float %133, 5.000000e-01
+  %416 = fmul float %129, %temp92.0
+  %417 = fadd float %416, %22
+  %418 = fmul float %130, %temp92.0
+  %419 = fadd float %418, %23
+  %420 = insertelement <4 x float> undef, float %417, i32 0
+  %421 = insertelement <4 x float> %420, float %419, i32 1
+  %422 = insertelement <4 x float> %421, float 0.000000e+00, i32 2
+  %423 = insertelement <4 x float> %422, float 0.000000e+00, i32 3
+  %424 = extractelement <4 x float> %423, i32 0
+  %425 = extractelement <4 x float> %423, i32 1
+  %426 = insertelement <4 x float> undef, float %424, i32 0
+  %427 = insertelement <4 x float> %426, float %425, i32 1
+  %428 = insertelement <4 x float> %427, float undef, i32 2
+  %429 = insertelement <4 x float> %428, float undef, i32 3
+  %430 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %429, i32 20, i32 4, i32 2)
+  %431 = extractelement <4 x float> %430, i32 3
+  %432 = fcmp oge float %temp92.0, %431
+  %433 = sext i1 %432 to i32
+  %434 = bitcast i32 %433 to float
+  %435 = bitcast float %434 to i32
+  %436 = icmp ne i32 %435, 0
+  br i1 %436, label %IF146, label %ENDIF145
 
 ENDIF139:                                         ; preds = %LOOP
-  %446 = fadd float %temp88.0, %133
-  %447 = fmul float %129, %446
-  %448 = fadd float %447, %22
-  %449 = fmul float %130, %446
-  %450 = fadd float %449, %23
-  %451 = insertelement <4 x float> undef, float %448, i32 0
-  %452 = insertelement <4 x float> %451, float %450, i32 1
-  %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 2
-  %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 3
-  %455 = extractelement <4 x float> %454, i32 0
-  %456 = extractelement <4 x float> %454, i32 1
-  %457 = insertelement <4 x float> undef, float %455, i32 0
-  %458 = insertelement <4 x float> %457, float %456, i32 1
-  %459 = insertelement <4 x float> %458, float undef, i32 2
-  %460 = insertelement <4 x float> %459, float undef, i32 3
-  %461 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %460, i32 20, i32 4, i32 2)
-  %462 = extractelement <4 x float> %461, i32 3
-  %463 = fcmp olt float 0x3FEFDF3B60000000, %temp92.0
-  %464 = sext i1 %463 to i32
-  %465 = bitcast i32 %464 to float
-  %466 = fcmp oge float %446, %462
-  %467 = sext i1 %466 to i32
+  %437 = fadd float %temp88.0, %133
+  %438 = fmul float %129, %437
+  %439 = fadd float %438, %22
+  %440 = fmul float %130, %437
+  %441 = fadd float %440, %23
+  %442 = insertelement <4 x float> undef, float %439, i32 0
+  %443 = insertelement <4 x float> %442, float %441, i32 1
+  %444 = insertelement <4 x float> %443, float 0.000000e+00, i32 2
+  %445 = insertelement <4 x float> %444, float 0.000000e+00, i32 3
+  %446 = extractelement <4 x float> %445, i32 0
+  %447 = extractelement <4 x float> %445, i32 1
+  %448 = insertelement <4 x float> undef, float %446, i32 0
+  %449 = insertelement <4 x float> %448, float %447, i32 1
+  %450 = insertelement <4 x float> %449, float undef, i32 2
+  %451 = insertelement <4 x float> %450, float undef, i32 3
+  %452 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %451, i32 20, i32 4, i32 2)
+  %453 = extractelement <4 x float> %452, i32 3
+  %454 = fcmp olt float 0x3FEFDF3B60000000, %temp92.0
+  %455 = sext i1 %454 to i32
+  %456 = bitcast i32 %455 to float
+  %457 = fcmp oge float %437, %453
+  %458 = sext i1 %457 to i32
+  %459 = bitcast i32 %458 to float
+  %460 = bitcast float %456 to i32
+  %461 = bitcast float %459 to i32
+  %462 = and i32 %460, %461
+  %463 = bitcast i32 %462 to float
+  %464 = bitcast float %463 to i32
+  %465 = icmp ne i32 %464, 0
+  %.temp92.0 = select i1 %465, float %437, float %temp92.0
+  %466 = bitcast float %temp96.0 to i32
+  %467 = add i32 %466, 1
   %468 = bitcast i32 %467 to float
-  %469 = bitcast float %465 to i32
-  %470 = bitcast float %468 to i32
-  %471 = and i32 %469, %470
-  %472 = bitcast i32 %471 to float
-  %473 = bitcast float %472 to i32
-  %474 = icmp ne i32 %473, 0
-  %.temp92.0 = select i1 %474, float %446, float %temp92.0
-  %475 = bitcast float %temp96.0 to i32
-  %476 = add i32 %475, 1
-  %477 = bitcast i32 %476 to float
   br label %LOOP
 
 IF146:                                            ; preds = %IF140
-  %478 = fmul float 2.000000e+00, %424
-  %479 = fsub float -0.000000e+00, %478
-  %480 = fadd float %temp92.0, %479
+  %469 = fmul float 2.000000e+00, %415
+  %470 = fsub float -0.000000e+00, %469
+  %471 = fadd float %temp92.0, %470
   br label %ENDIF145
 
-ENDIF145:                                         ; preds = %IF140, %IF146
-  %temp88.1 = phi float [ %480, %IF146 ], [ %temp92.0, %IF140 ]
-  %481 = fadd float %temp88.1, %424
-  %482 = fmul float %424, 5.000000e-01
-  %483 = fmul float %129, %481
-  %484 = fadd float %483, %22
-  %485 = fmul float %130, %481
-  %486 = fadd float %485, %23
-  %487 = insertelement <4 x float> undef, float %484, i32 0
-  %488 = insertelement <4 x float> %487, float %486, i32 1
-  %489 = insertelement <4 x float> %488, float 0.000000e+00, i32 2
-  %490 = insertelement <4 x float> %489, float %440, i32 3
-  %491 = extractelement <4 x float> %490, i32 0
-  %492 = extractelement <4 x float> %490, i32 1
-  %493 = insertelement <4 x float> undef, float %491, i32 0
-  %494 = insertelement <4 x float> %493, float %492, i32 1
-  %495 = insertelement <4 x float> %494, float undef, i32 2
-  %496 = insertelement <4 x float> %495, float undef, i32 3
-  %497 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %496, i32 20, i32 4, i32 2)
-  %498 = extractelement <4 x float> %497, i32 3
-  %499 = fcmp oge float %481, %498
-  %500 = sext i1 %499 to i32
-  %501 = bitcast i32 %500 to float
-  %502 = bitcast float %501 to i32
-  %503 = icmp ne i32 %502, 0
-  br i1 %503, label %IF149, label %ENDIF148
+ENDIF145:                                         ; preds = %IF146, %IF140
+  %temp88.1 = phi float [ %471, %IF146 ], [ %temp92.0, %IF140 ]
+  %472 = fadd float %temp88.1, %415
+  %473 = fmul float %415, 5.000000e-01
+  %474 = fmul float %129, %472
+  %475 = fadd float %474, %22
+  %476 = fmul float %130, %472
+  %477 = fadd float %476, %23
+  %478 = insertelement <4 x float> undef, float %475, i32 0
+  %479 = insertelement <4 x float> %478, float %477, i32 1
+  %480 = insertelement <4 x float> %479, float 0.000000e+00, i32 2
+  %481 = insertelement <4 x float> %480, float %431, i32 3
+  %482 = extractelement <4 x float> %481, i32 0
+  %483 = extractelement <4 x float> %481, i32 1
+  %484 = insertelement <4 x float> undef, float %482, i32 0
+  %485 = insertelement <4 x float> %484, float %483, i32 1
+  %486 = insertelement <4 x float> %485, float undef, i32 2
+  %487 = insertelement <4 x float> %486, float undef, i32 3
+  %488 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %487, i32 20, i32 4, i32 2)
+  %489 = extractelement <4 x float> %488, i32 3
+  %490 = fcmp oge float %472, %489
+  %491 = sext i1 %490 to i32
+  %492 = bitcast i32 %491 to float
+  %493 = bitcast float %492 to i32
+  %494 = icmp ne i32 %493, 0
+  br i1 %494, label %IF149, label %ENDIF148
 
 IF149:                                            ; preds = %ENDIF145
-  %504 = fmul float 2.000000e+00, %482
-  %505 = fsub float -0.000000e+00, %504
-  %506 = fadd float %481, %505
+  %495 = fmul float 2.000000e+00, %473
+  %496 = fsub float -0.000000e+00, %495
+  %497 = fadd float %472, %496
   br label %ENDIF148
 
-ENDIF148:                                         ; preds = %ENDIF145, %IF149
-  %temp88.2 = phi float [ %506, %IF149 ], [ %481, %ENDIF145 ]
-  %temp92.2 = phi float [ %481, %IF149 ], [ %temp92.0, %ENDIF145 ]
-  %507 = fadd float %temp88.2, %482
-  %508 = fmul float %482, 5.000000e-01
-  %509 = fmul float %129, %507
-  %510 = fadd float %509, %22
-  %511 = fmul float %130, %507
-  %512 = fadd float %511, %23
-  %513 = insertelement <4 x float> undef, float %510, i32 0
-  %514 = insertelement <4 x float> %513, float %512, i32 1
-  %515 = insertelement <4 x float> %514, float 0.000000e+00, i32 2
-  %516 = insertelement <4 x float> %515, float %498, i32 3
-  %517 = extractelement <4 x float> %516, i32 0
-  %518 = extractelement <4 x float> %516, i32 1
-  %519 = insertelement <4 x float> undef, float %517, i32 0
-  %520 = insertelement <4 x float> %519, float %518, i32 1
-  %521 = insertelement <4 x float> %520, float undef, i32 2
-  %522 = insertelement <4 x float> %521, float undef, i32 3
-  %523 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %522, i32 20, i32 4, i32 2)
-  %524 = extractelement <4 x float> %523, i32 3
-  %525 = fcmp oge float %507, %524
-  %526 = sext i1 %525 to i32
-  %527 = bitcast i32 %526 to float
-  %528 = bitcast float %527 to i32
-  %529 = icmp ne i32 %528, 0
-  br i1 %529, label %IF152, label %ENDIF151
+ENDIF148:                                         ; preds = %IF149, %ENDIF145
+  %temp88.2 = phi float [ %497, %IF149 ], [ %472, %ENDIF145 ]
+  %temp92.2 = phi float [ %472, %IF149 ], [ %temp92.0, %ENDIF145 ]
+  %498 = fadd float %temp88.2, %473
+  %499 = fmul float %473, 5.000000e-01
+  %500 = fmul float %129, %498
+  %501 = fadd float %500, %22
+  %502 = fmul float %130, %498
+  %503 = fadd float %502, %23
+  %504 = insertelement <4 x float> undef, float %501, i32 0
+  %505 = insertelement <4 x float> %504, float %503, i32 1
+  %506 = insertelement <4 x float> %505, float 0.000000e+00, i32 2
+  %507 = insertelement <4 x float> %506, float %489, i32 3
+  %508 = extractelement <4 x float> %507, i32 0
+  %509 = extractelement <4 x float> %507, i32 1
+  %510 = insertelement <4 x float> undef, float %508, i32 0
+  %511 = insertelement <4 x float> %510, float %509, i32 1
+  %512 = insertelement <4 x float> %511, float undef, i32 2
+  %513 = insertelement <4 x float> %512, float undef, i32 3
+  %514 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %513, i32 20, i32 4, i32 2)
+  %515 = extractelement <4 x float> %514, i32 3
+  %516 = fcmp oge float %498, %515
+  %517 = sext i1 %516 to i32
+  %518 = bitcast i32 %517 to float
+  %519 = bitcast float %518 to i32
+  %520 = icmp ne i32 %519, 0
+  br i1 %520, label %IF152, label %ENDIF151
 
 IF152:                                            ; preds = %ENDIF148
-  %530 = fmul float 2.000000e+00, %508
-  %531 = fsub float -0.000000e+00, %530
-  %532 = fadd float %507, %531
+  %521 = fmul float 2.000000e+00, %499
+  %522 = fsub float -0.000000e+00, %521
+  %523 = fadd float %498, %522
   br label %ENDIF151
 
-ENDIF151:                                         ; preds = %ENDIF148, %IF152
-  %temp88.3 = phi float [ %532, %IF152 ], [ %507, %ENDIF148 ]
-  %temp92.3 = phi float [ %507, %IF152 ], [ %temp92.2, %ENDIF148 ]
-  %533 = fadd float %temp88.3, %508
-  %534 = fmul float %508, 5.000000e-01
-  %535 = fmul float %129, %533
-  %536 = fadd float %535, %22
-  %537 = fmul float %130, %533
-  %538 = fadd float %537, %23
-  %539 = insertelement <4 x float> undef, float %536, i32 0
-  %540 = insertelement <4 x float> %539, float %538, i32 1
-  %541 = insertelement <4 x float> %540, float 0.000000e+00, i32 2
-  %542 = insertelement <4 x float> %541, float %524, i32 3
-  %543 = extractelement <4 x float> %542, i32 0
-  %544 = extractelement <4 x float> %542, i32 1
-  %545 = insertelement <4 x float> undef, float %543, i32 0
-  %546 = insertelement <4 x float> %545, float %544, i32 1
-  %547 = insertelement <4 x float> %546, float undef, i32 2
-  %548 = insertelement <4 x float> %547, float undef, i32 3
-  %549 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %548, i32 20, i32 4, i32 2)
-  %550 = extractelement <4 x float> %549, i32 3
-  %551 = fcmp oge float %533, %550
-  %552 = sext i1 %551 to i32
-  %553 = bitcast i32 %552 to float
-  %554 = bitcast float %553 to i32
-  %555 = icmp ne i32 %554, 0
-  br i1 %555, label %IF155, label %ENDIF154
+ENDIF151:                                         ; preds = %IF152, %ENDIF148
+  %temp88.3 = phi float [ %523, %IF152 ], [ %498, %ENDIF148 ]
+  %temp92.3 = phi float [ %498, %IF152 ], [ %temp92.2, %ENDIF148 ]
+  %524 = fadd float %temp88.3, %499
+  %525 = fmul float %499, 5.000000e-01
+  %526 = fmul float %129, %524
+  %527 = fadd float %526, %22
+  %528 = fmul float %130, %524
+  %529 = fadd float %528, %23
+  %530 = insertelement <4 x float> undef, float %527, i32 0
+  %531 = insertelement <4 x float> %530, float %529, i32 1
+  %532 = insertelement <4 x float> %531, float 0.000000e+00, i32 2
+  %533 = insertelement <4 x float> %532, float %515, i32 3
+  %534 = extractelement <4 x float> %533, i32 0
+  %535 = extractelement <4 x float> %533, i32 1
+  %536 = insertelement <4 x float> undef, float %534, i32 0
+  %537 = insertelement <4 x float> %536, float %535, i32 1
+  %538 = insertelement <4 x float> %537, float undef, i32 2
+  %539 = insertelement <4 x float> %538, float undef, i32 3
+  %540 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %539, i32 20, i32 4, i32 2)
+  %541 = extractelement <4 x float> %540, i32 3
+  %542 = fcmp oge float %524, %541
+  %543 = sext i1 %542 to i32
+  %544 = bitcast i32 %543 to float
+  %545 = bitcast float %544 to i32
+  %546 = icmp ne i32 %545, 0
+  br i1 %546, label %IF155, label %ENDIF154
 
 IF155:                                            ; preds = %ENDIF151
-  %556 = fmul float 2.000000e+00, %534
-  %557 = fsub float -0.000000e+00, %556
-  %558 = fadd float %533, %557
+  %547 = fmul float 2.000000e+00, %525
+  %548 = fsub float -0.000000e+00, %547
+  %549 = fadd float %524, %548
   br label %ENDIF154
 
-ENDIF154:                                         ; preds = %ENDIF151, %IF155
-  %temp88.4 = phi float [ %558, %IF155 ], [ %533, %ENDIF151 ]
-  %temp92.4 = phi float [ %533, %IF155 ], [ %temp92.3, %ENDIF151 ]
-  %559 = fadd float %temp88.4, %534
-  %560 = fmul float %129, %559
-  %561 = fadd float %560, %22
-  %562 = fmul float %130, %559
-  %563 = fadd float %562, %23
-  %564 = insertelement <4 x float> undef, float %561, i32 0
-  %565 = insertelement <4 x float> %564, float %563, i32 1
-  %566 = insertelement <4 x float> %565, float 0.000000e+00, i32 2
-  %567 = insertelement <4 x float> %566, float %550, i32 3
-  %568 = extractelement <4 x float> %567, i32 0
-  %569 = extractelement <4 x float> %567, i32 1
-  %570 = insertelement <4 x float> undef, float %568, i32 0
-  %571 = insertelement <4 x float> %570, float %569, i32 1
-  %572 = insertelement <4 x float> %571, float undef, i32 2
-  %573 = insertelement <4 x float> %572, float undef, i32 3
-  %574 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %573, i32 20, i32 4, i32 2)
-  %575 = extractelement <4 x float> %574, i32 3
-  %576 = fcmp oge float %559, %575
-  %577 = sext i1 %576 to i32
-  %578 = bitcast i32 %577 to float
-  %579 = bitcast float %578 to i32
-  %580 = icmp ne i32 %579, 0
-  %.temp92.4 = select i1 %580, float %559, float %temp92.4
-  %581 = fmul float %129, %.temp92.4
-  %582 = fadd float %581, %22
-  %583 = fmul float %130, %.temp92.4
-  %584 = fadd float %583, %23
-  %585 = insertelement <4 x float> undef, float %582, i32 0
-  %586 = insertelement <4 x float> %585, float %584, i32 1
-  %587 = insertelement <4 x float> %586, float 0.000000e+00, i32 2
-  %588 = insertelement <4 x float> %587, float %575, i32 3
-  %589 = extractelement <4 x float> %588, i32 0
-  %590 = extractelement <4 x float> %588, i32 1
-  %591 = insertelement <4 x float> undef, float %589, i32 0
-  %592 = insertelement <4 x float> %591, float %590, i32 1
-  %593 = insertelement <4 x float> %592, float undef, i32 2
-  %594 = insertelement <4 x float> %593, float undef, i32 3
-  %595 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %594, i32 20, i32 4, i32 2)
-  %596 = extractelement <4 x float> %595, i32 0
-  %597 = extractelement <4 x float> %595, i32 1
-  %598 = extractelement <4 x float> %595, i32 2
-  %599 = fmul float %596, 2.000000e+00
-  %600 = fadd float %599, -1.000000e+00
-  %601 = fmul float %597, 2.000000e+00
-  %602 = fadd float %601, -1.000000e+00
-  %603 = fmul float %598, 2.000000e+00
-  %604 = fadd float %603, -1.000000e+00
+ENDIF154:                                         ; preds = %IF155, %ENDIF151
+  %temp88.4 = phi float [ %549, %IF155 ], [ %524, %ENDIF151 ]
+  %temp92.4 = phi float [ %524, %IF155 ], [ %temp92.3, %ENDIF151 ]
+  %550 = fadd float %temp88.4, %525
+  %551 = fmul float %129, %550
+  %552 = fadd float %551, %22
+  %553 = fmul float %130, %550
+  %554 = fadd float %553, %23
+  %555 = insertelement <4 x float> undef, float %552, i32 0
+  %556 = insertelement <4 x float> %555, float %554, i32 1
+  %557 = insertelement <4 x float> %556, float 0.000000e+00, i32 2
+  %558 = insertelement <4 x float> %557, float %541, i32 3
+  %559 = extractelement <4 x float> %558, i32 0
+  %560 = extractelement <4 x float> %558, i32 1
+  %561 = insertelement <4 x float> undef, float %559, i32 0
+  %562 = insertelement <4 x float> %561, float %560, i32 1
+  %563 = insertelement <4 x float> %562, float undef, i32 2
+  %564 = insertelement <4 x float> %563, float undef, i32 3
+  %565 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %564, i32 20, i32 4, i32 2)
+  %566 = extractelement <4 x float> %565, i32 3
+  %567 = fcmp oge float %550, %566
+  %568 = sext i1 %567 to i32
+  %569 = bitcast i32 %568 to float
+  %570 = bitcast float %569 to i32
+  %571 = icmp ne i32 %570, 0
+  %.temp92.4 = select i1 %571, float %550, float %temp92.4
+  %572 = fmul float %129, %.temp92.4
+  %573 = fadd float %572, %22
+  %574 = fmul float %130, %.temp92.4
+  %575 = fadd float %574, %23
+  %576 = insertelement <4 x float> undef, float %573, i32 0
+  %577 = insertelement <4 x float> %576, float %575, i32 1
+  %578 = insertelement <4 x float> %577, float 0.000000e+00, i32 2
+  %579 = insertelement <4 x float> %578, float %566, i32 3
+  %580 = extractelement <4 x float> %579, i32 0
+  %581 = extractelement <4 x float> %579, i32 1
+  %582 = insertelement <4 x float> undef, float %580, i32 0
+  %583 = insertelement <4 x float> %582, float %581, i32 1
+  %584 = insertelement <4 x float> %583, float undef, i32 2
+  %585 = insertelement <4 x float> %584, float undef, i32 3
+  %586 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %585, i32 20, i32 4, i32 2)
+  %587 = extractelement <4 x float> %586, i32 0
+  %588 = extractelement <4 x float> %586, i32 1
+  %589 = extractelement <4 x float> %586, i32 2
+  %590 = fmul float %587, 2.000000e+00
+  %591 = fadd float %590, -1.000000e+00
+  %592 = fmul float %588, 2.000000e+00
+  %593 = fadd float %592, -1.000000e+00
+  %594 = fmul float %589, 2.000000e+00
+  %595 = fadd float %594, -1.000000e+00
   br label %ENDIF136
 
 IF161:                                            ; preds = %ENDIF136
-  %605 = fmul float %202, 0x3FB99999A0000000
-  %606 = fcmp uge float 0x3FE4CCCCC0000000, %605
-  %607 = select i1 %606, float 0x3FE4CCCCC0000000, float %605
-  %608 = fcmp uge float %607, 5.000000e-01
-  %609 = select i1 %608, float 5.000000e-01, float %607
-  %610 = call float @llvm.AMDGPU.lrp(float %609, float %400, float %300)
-  %611 = call float @llvm.AMDGPU.lrp(float %609, float %401, float %301)
-  %612 = call float @llvm.AMDGPU.lrp(float %609, float %402, float %302)
-  %613 = call float @llvm.AMDGPU.lrp(float %609, float %403, float %303)
-  %614 = insertelement <4 x float> undef, float %329, i32 0
-  %615 = insertelement <4 x float> %614, float %330, i32 1
-  %616 = insertelement <4 x float> %615, float %331, i32 2
-  %617 = insertelement <4 x float> %616, float 0.000000e+00, i32 3
-  %618 = insertelement <4 x float> undef, float %63, i32 0
-  %619 = insertelement <4 x float> %618, float %65, i32 1
-  %620 = insertelement <4 x float> %619, float %67, i32 2
-  %621 = insertelement <4 x float> %620, float 0.000000e+00, i32 3
-  %622 = call float @llvm.AMDGPU.dp4(<4 x float> %617, <4 x float> %621)
-  %623 = fcmp uge float 0x3FE6666660000000, %622
-  %624 = select i1 %623, float 0x3FE6666660000000, float %622
-  %625 = fmul float %8, %624
-  %626 = fmul float %13, %624
-  %627 = fmul float %18, %624
-  %628 = insertelement <4 x float> undef, float %34, i32 0
-  %629 = insertelement <4 x float> %628, float %35, i32 1
-  %630 = insertelement <4 x float> %629, float %36, i32 2
-  %631 = insertelement <4 x float> %630, float 0.000000e+00, i32 3
-  %632 = insertelement <4 x float> undef, float %63, i32 0
-  %633 = insertelement <4 x float> %632, float %65, i32 1
-  %634 = insertelement <4 x float> %633, float %67, i32 2
-  %635 = insertelement <4 x float> %634, float 0.000000e+00, i32 3
-  %636 = call float @llvm.AMDGPU.dp4(<4 x float> %631, <4 x float> %635)
-  %637 = fcmp uge float 0x3FECCCCCC0000000, %636
-  %638 = select i1 %637, float 0x3FECCCCCC0000000, float %636
-  %639 = fmul float %625, %638
-  %640 = fmul float %626, %638
-  %641 = fmul float %627, %638
+  %596 = fmul float %result.i, 0x3FB99999A0000000
+  %597 = fcmp uge float 0x3FE4CCCCC0000000, %596
+  %598 = select i1 %597, float 0x3FE4CCCCC0000000, float %596
+  %599 = fcmp uge float %598, 5.000000e-01
+  %600 = select i1 %599, float 5.000000e-01, float %598
+  %one.sub.a.i137 = fsub float 1.000000e+00, %600
+  %one.sub.ac.i138 = fmul float %one.sub.a.i137, %299
+  %mul.i139 = fmul float %result.i172, %299
+  %result.i140 = fadd float %mul.i139, %one.sub.ac.i138
+  %one.sub.a.i133 = fsub float 1.000000e+00, %600
+  %one.sub.ac.i134 = fmul float %one.sub.a.i133, %300
+  %mul.i135 = fmul float %result.i168, %300
+  %result.i136 = fadd float %mul.i135, %one.sub.ac.i134
+  %one.sub.a.i129 = fsub float 1.000000e+00, %600
+  %one.sub.ac.i130 = fmul float %one.sub.a.i129, %301
+  %mul.i131 = fmul float %result.i164, %301
+  %result.i132 = fadd float %mul.i131, %one.sub.ac.i130
+  %one.sub.a.i125 = fsub float 1.000000e+00, %600
+  %one.sub.ac.i126 = fmul float %one.sub.a.i125, %302
+  %mul.i127 = fmul float %result.i160, %302
+  %result.i128 = fadd float %mul.i127, %one.sub.ac.i126
+  %601 = insertelement <4 x float> undef, float %328, i32 0
+  %602 = insertelement <4 x float> %601, float %329, i32 1
+  %603 = insertelement <4 x float> %602, float %330, i32 2
+  %604 = insertelement <4 x float> %603, float 0.000000e+00, i32 3
+  %605 = insertelement <4 x float> undef, float %63, i32 0
+  %606 = insertelement <4 x float> %605, float %65, i32 1
+  %607 = insertelement <4 x float> %606, float %67, i32 2
+  %608 = insertelement <4 x float> %607, float 0.000000e+00, i32 3
+  %609 = call float @llvm.AMDGPU.dp4(<4 x float> %604, <4 x float> %608)
+  %610 = fcmp uge float 0x3FE6666660000000, %609
+  %611 = select i1 %610, float 0x3FE6666660000000, float %609
+  %612 = fmul float %8, %611
+  %613 = fmul float %13, %611
+  %614 = fmul float %18, %611
+  %615 = insertelement <4 x float> undef, float %34, i32 0
+  %616 = insertelement <4 x float> %615, float %35, i32 1
+  %617 = insertelement <4 x float> %616, float %36, i32 2
+  %618 = insertelement <4 x float> %617, float 0.000000e+00, i32 3
+  %619 = insertelement <4 x float> undef, float %63, i32 0
+  %620 = insertelement <4 x float> %619, float %65, i32 1
+  %621 = insertelement <4 x float> %620, float %67, i32 2
+  %622 = insertelement <4 x float> %621, float 0.000000e+00, i32 3
+  %623 = call float @llvm.AMDGPU.dp4(<4 x float> %618, <4 x float> %622)
+  %624 = fcmp uge float 0x3FECCCCCC0000000, %623
+  %625 = select i1 %624, float 0x3FECCCCCC0000000, float %623
+  %626 = fmul float %612, %625
+  %627 = fmul float %613, %625
+  %628 = fmul float %614, %625
   br label %ENDIF160
 
-ENDIF160:                                         ; preds = %ENDIF136, %IF161
-  %temp84.0 = phi float [ %610, %IF161 ], [ %255, %ENDIF136 ]
-  %temp85.0 = phi float [ %611, %IF161 ], [ %256, %ENDIF136 ]
-  %temp86.0 = phi float [ %612, %IF161 ], [ %257, %ENDIF136 ]
-  %temp87.0 = phi float [ %613, %IF161 ], [ %258, %ENDIF136 ]
-  %temp92.6 = phi float [ %639, %IF161 ], [ %415, %ENDIF136 ]
-  %temp93.0 = phi float [ %640, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
-  %temp94.0 = phi float [ %641, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
-  %642 = fcmp olt float 2.200000e+03, %179
-  %643 = sext i1 %642 to i32
-  %644 = bitcast i32 %643 to float
-  %645 = fcmp olt float %179, 2.300000e+03
-  %646 = sext i1 %645 to i32
-  %647 = bitcast i32 %646 to float
-  %648 = bitcast float %644 to i32
-  %649 = bitcast float %647 to i32
-  %650 = and i32 %648, %649
-  %651 = bitcast i32 %650 to float
-  %652 = bitcast float %651 to i32
-  %653 = icmp ne i32 %652, 0
-  br i1 %653, label %IF164, label %ENDIF163
+ENDIF160:                                         ; preds = %IF161, %ENDIF136
+  %temp84.0 = phi float [ %result.i140, %IF161 ], [ %254, %ENDIF136 ]
+  %temp85.0 = phi float [ %result.i136, %IF161 ], [ %255, %ENDIF136 ]
+  %temp86.0 = phi float [ %result.i132, %IF161 ], [ %256, %ENDIF136 ]
+  %temp87.0 = phi float [ %result.i128, %IF161 ], [ %257, %ENDIF136 ]
+  %temp92.6 = phi float [ %626, %IF161 ], [ %406, %ENDIF136 ]
+  %temp93.0 = phi float [ %627, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
+  %temp94.0 = phi float [ %628, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
+  %629 = fcmp olt float 2.200000e+03, %179
+  %630 = sext i1 %629 to i32
+  %631 = bitcast i32 %630 to float
+  %632 = fcmp olt float %179, 2.300000e+03
+  %633 = sext i1 %632 to i32
+  %634 = bitcast i32 %633 to float
+  %635 = bitcast float %631 to i32
+  %636 = bitcast float %634 to i32
+  %637 = and i32 %635, %636
+  %638 = bitcast i32 %637 to float
+  %639 = bitcast float %638 to i32
+  %640 = icmp ne i32 %639, 0
+  br i1 %640, label %IF164, label %ENDIF163
 
 IF164:                                            ; preds = %ENDIF160
-  %654 = fmul float %202, 5.000000e-01
-  %655 = fcmp uge float 0x3FE4CCCCC0000000, %654
-  %656 = select i1 %655, float 0x3FE4CCCCC0000000, float %654
-  %657 = fcmp uge float %656, 0x3FD6666660000000
-  %658 = select i1 %657, float 0x3FD6666660000000, float %656
-  %659 = call float @llvm.AMDGPU.lrp(float %658, float %400, float %300)
-  %660 = call float @llvm.AMDGPU.lrp(float %658, float %401, float %301)
-  %661 = call float @llvm.AMDGPU.lrp(float %658, float %402, float %302)
-  %662 = call float @llvm.AMDGPU.lrp(float %658, float %403, float %303)
-  %663 = insertelement <4 x float> undef, float %329, i32 0
-  %664 = insertelement <4 x float> %663, float %330, i32 1
-  %665 = insertelement <4 x float> %664, float %331, i32 2
-  %666 = insertelement <4 x float> %665, float 0.000000e+00, i32 3
-  %667 = insertelement <4 x float> undef, float %63, i32 0
-  %668 = insertelement <4 x float> %667, float %65, i32 1
-  %669 = insertelement <4 x float> %668, float %67, i32 2
-  %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 3
-  %671 = call float @llvm.AMDGPU.dp4(<4 x float> %666, <4 x float> %670)
-  %672 = fcmp uge float 0x3FE6666660000000, %671
-  %673 = select i1 %672, float 0x3FE6666660000000, float %671
-  %674 = fmul float %8, %673
-  %675 = fmul float %13, %673
-  %676 = fmul float %18, %673
-  %677 = insertelement <4 x float> undef, float %34, i32 0
-  %678 = insertelement <4 x float> %677, float %35, i32 1
-  %679 = insertelement <4 x float> %678, float %36, i32 2
-  %680 = insertelement <4 x float> %679, float 0.000000e+00, i32 3
-  %681 = insertelement <4 x float> undef, float %63, i32 0
-  %682 = insertelement <4 x float> %681, float %65, i32 1
-  %683 = insertelement <4 x float> %682, float %67, i32 2
-  %684 = insertelement <4 x float> %683, float 0.000000e+00, i32 3
-  %685 = call float @llvm.AMDGPU.dp4(<4 x float> %680, <4 x float> %684)
-  %686 = fcmp uge float 0x3FECCCCCC0000000, %685
-  %687 = select i1 %686, float 0x3FECCCCCC0000000, float %685
-  %688 = fmul float %674, %687
-  %689 = fmul float %675, %687
-  %690 = fmul float %676, %687
+  %641 = fmul float %result.i, 5.000000e-01
+  %642 = fcmp uge float 0x3FE4CCCCC0000000, %641
+  %643 = select i1 %642, float 0x3FE4CCCCC0000000, float %641
+  %644 = fcmp uge float %643, 0x3FD6666660000000
+  %645 = select i1 %644, float 0x3FD6666660000000, float %643
+  %one.sub.a.i121 = fsub float 1.000000e+00, %645
+  %one.sub.ac.i122 = fmul float %one.sub.a.i121, %299
+  %mul.i123 = fmul float %result.i172, %299
+  %result.i124 = fadd float %mul.i123, %one.sub.ac.i122
+  %one.sub.a.i117 = fsub float 1.000000e+00, %645
+  %one.sub.ac.i118 = fmul float %one.sub.a.i117, %300
+  %mul.i119 = fmul float %result.i168, %300
+  %result.i120 = fadd float %mul.i119, %one.sub.ac.i118
+  %one.sub.a.i113 = fsub float 1.000000e+00, %645
+  %one.sub.ac.i114 = fmul float %one.sub.a.i113, %301
+  %mul.i115 = fmul float %result.i164, %301
+  %result.i116 = fadd float %mul.i115, %one.sub.ac.i114
+  %one.sub.a.i109 = fsub float 1.000000e+00, %645
+  %one.sub.ac.i110 = fmul float %one.sub.a.i109, %302
+  %mul.i111 = fmul float %result.i160, %302
+  %result.i112 = fadd float %mul.i111, %one.sub.ac.i110
+  %646 = insertelement <4 x float> undef, float %328, i32 0
+  %647 = insertelement <4 x float> %646, float %329, i32 1
+  %648 = insertelement <4 x float> %647, float %330, i32 2
+  %649 = insertelement <4 x float> %648, float 0.000000e+00, i32 3
+  %650 = insertelement <4 x float> undef, float %63, i32 0
+  %651 = insertelement <4 x float> %650, float %65, i32 1
+  %652 = insertelement <4 x float> %651, float %67, i32 2
+  %653 = insertelement <4 x float> %652, float 0.000000e+00, i32 3
+  %654 = call float @llvm.AMDGPU.dp4(<4 x float> %649, <4 x float> %653)
+  %655 = fcmp uge float 0x3FE6666660000000, %654
+  %656 = select i1 %655, float 0x3FE6666660000000, float %654
+  %657 = fmul float %8, %656
+  %658 = fmul float %13, %656
+  %659 = fmul float %18, %656
+  %660 = insertelement <4 x float> undef, float %34, i32 0
+  %661 = insertelement <4 x float> %660, float %35, i32 1
+  %662 = insertelement <4 x float> %661, float %36, i32 2
+  %663 = insertelement <4 x float> %662, float 0.000000e+00, i32 3
+  %664 = insertelement <4 x float> undef, float %63, i32 0
+  %665 = insertelement <4 x float> %664, float %65, i32 1
+  %666 = insertelement <4 x float> %665, float %67, i32 2
+  %667 = insertelement <4 x float> %666, float 0.000000e+00, i32 3
+  %668 = call float @llvm.AMDGPU.dp4(<4 x float> %663, <4 x float> %667)
+  %669 = fcmp uge float 0x3FECCCCCC0000000, %668
+  %670 = select i1 %669, float 0x3FECCCCCC0000000, float %668
+  %671 = fmul float %657, %670
+  %672 = fmul float %658, %670
+  %673 = fmul float %659, %670
   br label %ENDIF163
 
-ENDIF163:                                         ; preds = %ENDIF160, %IF164
-  %temp84.1 = phi float [ %659, %IF164 ], [ %temp84.0, %ENDIF160 ]
-  %temp85.1 = phi float [ %660, %IF164 ], [ %temp85.0, %ENDIF160 ]
-  %temp86.1 = phi float [ %661, %IF164 ], [ %temp86.0, %ENDIF160 ]
-  %temp87.1 = phi float [ %662, %IF164 ], [ %temp87.0, %ENDIF160 ]
-  %temp92.7 = phi float [ %688, %IF164 ], [ %temp92.6, %ENDIF160 ]
-  %temp93.1 = phi float [ %689, %IF164 ], [ %temp93.0, %ENDIF160 ]
-  %temp94.1 = phi float [ %690, %IF164 ], [ %temp94.0, %ENDIF160 ]
-  %691 = fcmp oge float %179, 2.300000e+03
-  %692 = sext i1 %691 to i32
-  %693 = bitcast i32 %692 to float
-  %694 = fcmp olt float %179, 2.480000e+03
-  %695 = sext i1 %694 to i32
-  %696 = bitcast i32 %695 to float
-  %697 = bitcast float %693 to i32
-  %698 = bitcast float %696 to i32
-  %699 = and i32 %697, %698
-  %700 = bitcast i32 %699 to float
-  %701 = bitcast float %700 to i32
-  %702 = icmp ne i32 %701, 0
-  br i1 %702, label %IF167, label %ENDIF166
+ENDIF163:                                         ; preds = %IF164, %ENDIF160
+  %temp84.1 = phi float [ %result.i124, %IF164 ], [ %temp84.0, %ENDIF160 ]
+  %temp85.1 = phi float [ %result.i120, %IF164 ], [ %temp85.0, %ENDIF160 ]
+  %temp86.1 = phi float [ %result.i116, %IF164 ], [ %temp86.0, %ENDIF160 ]
+  %temp87.1 = phi float [ %result.i112, %IF164 ], [ %temp87.0, %ENDIF160 ]
+  %temp92.7 = phi float [ %671, %IF164 ], [ %temp92.6, %ENDIF160 ]
+  %temp93.1 = phi float [ %672, %IF164 ], [ %temp93.0, %ENDIF160 ]
+  %temp94.1 = phi float [ %673, %IF164 ], [ %temp94.0, %ENDIF160 ]
+  %674 = fcmp oge float %179, 2.300000e+03
+  %675 = sext i1 %674 to i32
+  %676 = bitcast i32 %675 to float
+  %677 = fcmp olt float %179, 2.480000e+03
+  %678 = sext i1 %677 to i32
+  %679 = bitcast i32 %678 to float
+  %680 = bitcast float %676 to i32
+  %681 = bitcast float %679 to i32
+  %682 = and i32 %680, %681
+  %683 = bitcast i32 %682 to float
+  %684 = bitcast float %683 to i32
+  %685 = icmp ne i32 %684, 0
+  br i1 %685, label %IF167, label %ENDIF166
 
 IF167:                                            ; preds = %ENDIF163
-  %703 = fmul float %202, 5.000000e-01
-  %704 = fcmp uge float 0x3FE4CCCCC0000000, %703
-  %705 = select i1 %704, float 0x3FE4CCCCC0000000, float %703
-  %706 = fcmp uge float %705, 0x3FD3333340000000
-  %707 = select i1 %706, float 0x3FD3333340000000, float %705
-  %708 = call float @llvm.AMDGPU.lrp(float %707, float %409, float %300)
-  %709 = call float @llvm.AMDGPU.lrp(float %707, float %410, float %301)
-  %710 = call float @llvm.AMDGPU.lrp(float %707, float %411, float %302)
-  %711 = call float @llvm.AMDGPU.lrp(float %707, float %412, float %303)
-  %712 = insertelement <4 x float> undef, float %329, i32 0
-  %713 = insertelement <4 x float> %712, float %330, i32 1
-  %714 = insertelement <4 x float> %713, float %331, i32 2
-  %715 = insertelement <4 x float> %714, float 0.000000e+00, i32 3
-  %716 = insertelement <4 x float> undef, float %63, i32 0
-  %717 = insertelement <4 x float> %716, float %65, i32 1
-  %718 = insertelement <4 x float> %717, float %67, i32 2
-  %719 = insertelement <4 x float> %718, float 0.000000e+00, i32 3
-  %720 = call float @llvm.AMDGPU.dp4(<4 x float> %715, <4 x float> %719)
-  %721 = fcmp uge float 0x3FEB333340000000, %720
-  %722 = select i1 %721, float 0x3FEB333340000000, float %720
-  %723 = fmul float %8, %722
-  %724 = fmul float %13, %722
-  %725 = fmul float %18, %722
-  %726 = insertelement <4 x float> undef, float %34, i32 0
-  %727 = insertelement <4 x float> %726, float %35, i32 1
-  %728 = insertelement <4 x float> %727, float %36, i32 2
-  %729 = insertelement <4 x float> %728, float 0.000000e+00, i32 3
-  %730 = insertelement <4 x float> undef, float %63, i32 0
-  %731 = insertelement <4 x float> %730, float %65, i32 1
-  %732 = insertelement <4 x float> %731, float %67, i32 2
-  %733 = insertelement <4 x float> %732, float 0.000000e+00, i32 3
-  %734 = call float @llvm.AMDGPU.dp4(<4 x float> %729, <4 x float> %733)
-  %735 = fcmp uge float 0x3FECCCCCC0000000, %734
-  %736 = select i1 %735, float 0x3FECCCCCC0000000, float %734
-  %737 = fmul float %723, %736
-  %738 = fmul float %724, %736
-  %739 = fmul float %725, %736
+  %686 = fmul float %result.i, 5.000000e-01
+  %687 = fcmp uge float 0x3FE4CCCCC0000000, %686
+  %688 = select i1 %687, float 0x3FE4CCCCC0000000, float %686
+  %689 = fcmp uge float %688, 0x3FD3333340000000
+  %690 = select i1 %689, float 0x3FD3333340000000, float %688
+  %one.sub.a.i105 = fsub float 1.000000e+00, %690
+  %one.sub.ac.i106 = fmul float %one.sub.a.i105, %299
+  %mul.i107 = fmul float %result.i156, %299
+  %result.i108 = fadd float %mul.i107, %one.sub.ac.i106
+  %one.sub.a.i101 = fsub float 1.000000e+00, %690
+  %one.sub.ac.i102 = fmul float %one.sub.a.i101, %300
+  %mul.i103 = fmul float %result.i152, %300
+  %result.i104 = fadd float %mul.i103, %one.sub.ac.i102
+  %one.sub.a.i97 = fsub float 1.000000e+00, %690
+  %one.sub.ac.i98 = fmul float %one.sub.a.i97, %301
+  %mul.i99 = fmul float %result.i148, %301
+  %result.i100 = fadd float %mul.i99, %one.sub.ac.i98
+  %one.sub.a.i93 = fsub float 1.000000e+00, %690
+  %one.sub.ac.i94 = fmul float %one.sub.a.i93, %302
+  %mul.i95 = fmul float %result.i144, %302
+  %result.i96 = fadd float %mul.i95, %one.sub.ac.i94
+  %691 = insertelement <4 x float> undef, float %328, i32 0
+  %692 = insertelement <4 x float> %691, float %329, i32 1
+  %693 = insertelement <4 x float> %692, float %330, i32 2
+  %694 = insertelement <4 x float> %693, float 0.000000e+00, i32 3
+  %695 = insertelement <4 x float> undef, float %63, i32 0
+  %696 = insertelement <4 x float> %695, float %65, i32 1
+  %697 = insertelement <4 x float> %696, float %67, i32 2
+  %698 = insertelement <4 x float> %697, float 0.000000e+00, i32 3
+  %699 = call float @llvm.AMDGPU.dp4(<4 x float> %694, <4 x float> %698)
+  %700 = fcmp uge float 0x3FEB333340000000, %699
+  %701 = select i1 %700, float 0x3FEB333340000000, float %699
+  %702 = fmul float %8, %701
+  %703 = fmul float %13, %701
+  %704 = fmul float %18, %701
+  %705 = insertelement <4 x float> undef, float %34, i32 0
+  %706 = insertelement <4 x float> %705, float %35, i32 1
+  %707 = insertelement <4 x float> %706, float %36, i32 2
+  %708 = insertelement <4 x float> %707, float 0.000000e+00, i32 3
+  %709 = insertelement <4 x float> undef, float %63, i32 0
+  %710 = insertelement <4 x float> %709, float %65, i32 1
+  %711 = insertelement <4 x float> %710, float %67, i32 2
+  %712 = insertelement <4 x float> %711, float 0.000000e+00, i32 3
+  %713 = call float @llvm.AMDGPU.dp4(<4 x float> %708, <4 x float> %712)
+  %714 = fcmp uge float 0x3FECCCCCC0000000, %713
+  %715 = select i1 %714, float 0x3FECCCCCC0000000, float %713
+  %716 = fmul float %702, %715
+  %717 = fmul float %703, %715
+  %718 = fmul float %704, %715
   br label %ENDIF166
 
-ENDIF166:                                         ; preds = %ENDIF163, %IF167
-  %temp84.2 = phi float [ %708, %IF167 ], [ %temp84.1, %ENDIF163 ]
-  %temp85.2 = phi float [ %709, %IF167 ], [ %temp85.1, %ENDIF163 ]
-  %temp86.2 = phi float [ %710, %IF167 ], [ %temp86.1, %ENDIF163 ]
-  %temp87.2 = phi float [ %711, %IF167 ], [ %temp87.1, %ENDIF163 ]
-  %temp92.8 = phi float [ %737, %IF167 ], [ %temp92.7, %ENDIF163 ]
-  %temp93.2 = phi float [ %738, %IF167 ], [ %temp93.1, %ENDIF163 ]
-  %temp94.2 = phi float [ %739, %IF167 ], [ %temp94.1, %ENDIF163 ]
-  %740 = fcmp oge float %179, 2.480000e+03
-  %741 = sext i1 %740 to i32
-  %742 = bitcast i32 %741 to float
-  %743 = fcmp olt float %179, 2.530000e+03
-  %744 = sext i1 %743 to i32
-  %745 = bitcast i32 %744 to float
-  %746 = bitcast float %742 to i32
-  %747 = bitcast float %745 to i32
-  %748 = and i32 %746, %747
-  %749 = bitcast i32 %748 to float
-  %750 = bitcast float %749 to i32
-  %751 = icmp ne i32 %750, 0
-  br i1 %751, label %IF170, label %ENDIF169
+ENDIF166:                                         ; preds = %IF167, %ENDIF163
+  %temp84.2 = phi float [ %result.i108, %IF167 ], [ %temp84.1, %ENDIF163 ]
+  %temp85.2 = phi float [ %result.i104, %IF167 ], [ %temp85.1, %ENDIF163 ]
+  %temp86.2 = phi float [ %result.i100, %IF167 ], [ %temp86.1, %ENDIF163 ]
+  %temp87.2 = phi float [ %result.i96, %IF167 ], [ %temp87.1, %ENDIF163 ]
+  %temp92.8 = phi float [ %716, %IF167 ], [ %temp92.7, %ENDIF163 ]
+  %temp93.2 = phi float [ %717, %IF167 ], [ %temp93.1, %ENDIF163 ]
+  %temp94.2 = phi float [ %718, %IF167 ], [ %temp94.1, %ENDIF163 ]
+  %719 = fcmp oge float %179, 2.480000e+03
+  %720 = sext i1 %719 to i32
+  %721 = bitcast i32 %720 to float
+  %722 = fcmp olt float %179, 2.530000e+03
+  %723 = sext i1 %722 to i32
+  %724 = bitcast i32 %723 to float
+  %725 = bitcast float %721 to i32
+  %726 = bitcast float %724 to i32
+  %727 = and i32 %725, %726
+  %728 = bitcast i32 %727 to float
+  %729 = bitcast float %728 to i32
+  %730 = icmp ne i32 %729, 0
+  br i1 %730, label %IF170, label %ENDIF169
 
 IF170:                                            ; preds = %ENDIF166
-  %752 = fmul float %202, 5.000000e-01
-  %753 = fcmp uge float 0x3FE4CCCCC0000000, %752
-  %754 = select i1 %753, float 0x3FE4CCCCC0000000, float %752
-  %755 = fcmp uge float %754, 0x3FC99999A0000000
-  %756 = select i1 %755, float 0x3FC99999A0000000, float %754
-  %757 = call float @llvm.AMDGPU.lrp(float %756, float %409, float %300)
-  %758 = call float @llvm.AMDGPU.lrp(float %756, float %410, float %301)
-  %759 = call float @llvm.AMDGPU.lrp(float %756, float %411, float %302)
-  %760 = call float @llvm.AMDGPU.lrp(float %756, float %412, float %303)
-  %761 = insertelement <4 x float> undef, float %329, i32 0
-  %762 = insertelement <4 x float> %761, float %330, i32 1
-  %763 = insertelement <4 x float> %762, float %331, i32 2
-  %764 = insertelement <4 x float> %763, float 0.000000e+00, i32 3
-  %765 = insertelement <4 x float> undef, float %63, i32 0
-  %766 = insertelement <4 x float> %765, float %65, i32 1
-  %767 = insertelement <4 x float> %766, float %67, i32 2
-  %768 = insertelement <4 x float> %767, float 0.000000e+00, i32 3
-  %769 = call float @llvm.AMDGPU.dp4(<4 x float> %764, <4 x float> %768)
-  %770 = fcmp uge float 0x3FEB333340000000, %769
-  %771 = select i1 %770, float 0x3FEB333340000000, float %769
-  %772 = fmul float %8, %771
-  %773 = fmul float %13, %771
-  %774 = fmul float %18, %771
-  %775 = insertelement <4 x float> undef, float %34, i32 0
-  %776 = insertelement <4 x float> %775, float %35, i32 1
-  %777 = insertelement <4 x float> %776, float %36, i32 2
-  %778 = insertelement <4 x float> %777, float 0.000000e+00, i32 3
-  %779 = insertelement <4 x float> undef, float %63, i32 0
-  %780 = insertelement <4 x float> %779, float %65, i32 1
-  %781 = insertelement <4 x float> %780, float %67, i32 2
-  %782 = insertelement <4 x float> %781, float 0.000000e+00, i32 3
-  %783 = call float @llvm.AMDGPU.dp4(<4 x float> %778, <4 x float> %782)
-  %784 = fcmp uge float 0x3FECCCCCC0000000, %783
-  %785 = select i1 %784, float 0x3FECCCCCC0000000, float %783
-  %786 = fmul float %772, %785
-  %787 = fmul float %773, %785
-  %788 = fmul float %774, %785
+  %731 = fmul float %result.i, 5.000000e-01
+  %732 = fcmp uge float 0x3FE4CCCCC0000000, %731
+  %733 = select i1 %732, float 0x3FE4CCCCC0000000, float %731
+  %734 = fcmp uge float %733, 0x3FC99999A0000000
+  %735 = select i1 %734, float 0x3FC99999A0000000, float %733
+  %one.sub.a.i89 = fsub float 1.000000e+00, %735
+  %one.sub.ac.i90 = fmul float %one.sub.a.i89, %299
+  %mul.i91 = fmul float %result.i156, %299
+  %result.i92 = fadd float %mul.i91, %one.sub.ac.i90
+  %one.sub.a.i85 = fsub float 1.000000e+00, %735
+  %one.sub.ac.i86 = fmul float %one.sub.a.i85, %300
+  %mul.i87 = fmul float %result.i152, %300
+  %result.i88 = fadd float %mul.i87, %one.sub.ac.i86
+  %one.sub.a.i81 = fsub float 1.000000e+00, %735
+  %one.sub.ac.i82 = fmul float %one.sub.a.i81, %301
+  %mul.i83 = fmul float %result.i148, %301
+  %result.i84 = fadd float %mul.i83, %one.sub.ac.i82
+  %one.sub.a.i77 = fsub float 1.000000e+00, %735
+  %one.sub.ac.i78 = fmul float %one.sub.a.i77, %302
+  %mul.i79 = fmul float %result.i144, %302
+  %result.i80 = fadd float %mul.i79, %one.sub.ac.i78
+  %736 = insertelement <4 x float> undef, float %328, i32 0
+  %737 = insertelement <4 x float> %736, float %329, i32 1
+  %738 = insertelement <4 x float> %737, float %330, i32 2
+  %739 = insertelement <4 x float> %738, float 0.000000e+00, i32 3
+  %740 = insertelement <4 x float> undef, float %63, i32 0
+  %741 = insertelement <4 x float> %740, float %65, i32 1
+  %742 = insertelement <4 x float> %741, float %67, i32 2
+  %743 = insertelement <4 x float> %742, float 0.000000e+00, i32 3
+  %744 = call float @llvm.AMDGPU.dp4(<4 x float> %739, <4 x float> %743)
+  %745 = fcmp uge float 0x3FEB333340000000, %744
+  %746 = select i1 %745, float 0x3FEB333340000000, float %744
+  %747 = fmul float %8, %746
+  %748 = fmul float %13, %746
+  %749 = fmul float %18, %746
+  %750 = insertelement <4 x float> undef, float %34, i32 0
+  %751 = insertelement <4 x float> %750, float %35, i32 1
+  %752 = insertelement <4 x float> %751, float %36, i32 2
+  %753 = insertelement <4 x float> %752, float 0.000000e+00, i32 3
+  %754 = insertelement <4 x float> undef, float %63, i32 0
+  %755 = insertelement <4 x float> %754, float %65, i32 1
+  %756 = insertelement <4 x float> %755, float %67, i32 2
+  %757 = insertelement <4 x float> %756, float 0.000000e+00, i32 3
+  %758 = call float @llvm.AMDGPU.dp4(<4 x float> %753, <4 x float> %757)
+  %759 = fcmp uge float 0x3FECCCCCC0000000, %758
+  %760 = select i1 %759, float 0x3FECCCCCC0000000, float %758
+  %761 = fmul float %747, %760
+  %762 = fmul float %748, %760
+  %763 = fmul float %749, %760
   br label %ENDIF169
 
-ENDIF169:                                         ; preds = %ENDIF166, %IF170
-  %temp84.3 = phi float [ %757, %IF170 ], [ %temp84.2, %ENDIF166 ]
-  %temp85.3 = phi float [ %758, %IF170 ], [ %temp85.2, %ENDIF166 ]
-  %temp86.3 = phi float [ %759, %IF170 ], [ %temp86.2, %ENDIF166 ]
-  %temp87.3 = phi float [ %760, %IF170 ], [ %temp87.2, %ENDIF166 ]
-  %temp92.9 = phi float [ %786, %IF170 ], [ %temp92.8, %ENDIF166 ]
-  %temp93.3 = phi float [ %787, %IF170 ], [ %temp93.2, %ENDIF166 ]
-  %temp94.3 = phi float [ %788, %IF170 ], [ %temp94.2, %ENDIF166 ]
-  %789 = fcmp oge float %179, 2.530000e+03
-  %790 = sext i1 %789 to i32
-  %791 = bitcast i32 %790 to float
-  %792 = fcmp olt float %179, 2.670000e+03
-  %793 = sext i1 %792 to i32
-  %794 = bitcast i32 %793 to float
-  %795 = bitcast float %791 to i32
-  %796 = bitcast float %794 to i32
-  %797 = and i32 %795, %796
-  %798 = bitcast i32 %797 to float
-  %799 = bitcast float %798 to i32
-  %800 = icmp ne i32 %799, 0
-  br i1 %800, label %IF173, label %ENDIF172
+ENDIF169:                                         ; preds = %IF170, %ENDIF166
+  %temp84.3 = phi float [ %result.i92, %IF170 ], [ %temp84.2, %ENDIF166 ]
+  %temp85.3 = phi float [ %result.i88, %IF170 ], [ %temp85.2, %ENDIF166 ]
+  %temp86.3 = phi float [ %result.i84, %IF170 ], [ %temp86.2, %ENDIF166 ]
+  %temp87.3 = phi float [ %result.i80, %IF170 ], [ %temp87.2, %ENDIF166 ]
+  %temp92.9 = phi float [ %761, %IF170 ], [ %temp92.8, %ENDIF166 ]
+  %temp93.3 = phi float [ %762, %IF170 ], [ %temp93.2, %ENDIF166 ]
+  %temp94.3 = phi float [ %763, %IF170 ], [ %temp94.2, %ENDIF166 ]
+  %764 = fcmp oge float %179, 2.530000e+03
+  %765 = sext i1 %764 to i32
+  %766 = bitcast i32 %765 to float
+  %767 = fcmp olt float %179, 2.670000e+03
+  %768 = sext i1 %767 to i32
+  %769 = bitcast i32 %768 to float
+  %770 = bitcast float %766 to i32
+  %771 = bitcast float %769 to i32
+  %772 = and i32 %770, %771
+  %773 = bitcast i32 %772 to float
+  %774 = bitcast float %773 to i32
+  %775 = icmp ne i32 %774, 0
+  br i1 %775, label %IF173, label %ENDIF172
 
 IF173:                                            ; preds = %ENDIF169
-  %801 = fmul float %202, 5.000000e-01
-  %802 = fcmp uge float 0x3FE4CCCCC0000000, %801
-  %803 = select i1 %802, float 0x3FE4CCCCC0000000, float %801
-  %804 = fcmp uge float %803, 0x3FB99999A0000000
-  %805 = select i1 %804, float 0x3FB99999A0000000, float %803
-  %806 = call float @llvm.AMDGPU.lrp(float %805, float %400, float %300)
-  %807 = call float @llvm.AMDGPU.lrp(float %805, float %401, float %301)
-  %808 = call float @llvm.AMDGPU.lrp(float %805, float %402, float %302)
-  %809 = call float @llvm.AMDGPU.lrp(float %805, float %403, float %303)
-  %810 = insertelement <4 x float> undef, float %329, i32 0
-  %811 = insertelement <4 x float> %810, float %330, i32 1
-  %812 = insertelement <4 x float> %811, float %331, i32 2
-  %813 = insertelement <4 x float> %812, float 0.000000e+00, i32 3
-  %814 = insertelement <4 x float> undef, float %63, i32 0
-  %815 = insertelement <4 x float> %814, float %65, i32 1
-  %816 = insertelement <4 x float> %815, float %67, i32 2
-  %817 = insertelement <4 x float> %816, float 0.000000e+00, i32 3
-  %818 = call float @llvm.AMDGPU.dp4(<4 x float> %813, <4 x float> %817)
-  %819 = fcmp uge float 0x3FEB333340000000, %818
-  %820 = select i1 %819, float 0x3FEB333340000000, float %818
-  %821 = fmul float %8, %820
-  %822 = fmul float %13, %820
-  %823 = fmul float %18, %820
-  %824 = insertelement <4 x float> undef, float %34, i32 0
-  %825 = insertelement <4 x float> %824, float %35, i32 1
-  %826 = insertelement <4 x float> %825, float %36, i32 2
-  %827 = insertelement <4 x float> %826, float 0.000000e+00, i32 3
-  %828 = insertelement <4 x float> undef, float %63, i32 0
-  %829 = insertelement <4 x float> %828, float %65, i32 1
-  %830 = insertelement <4 x float> %829, float %67, i32 2
-  %831 = insertelement <4 x float> %830, float 0.000000e+00, i32 3
-  %832 = call float @llvm.AMDGPU.dp4(<4 x float> %827, <4 x float> %831)
-  %833 = fcmp uge float 0x3FECCCCCC0000000, %832
-  %834 = select i1 %833, float 0x3FECCCCCC0000000, float %832
-  %835 = fmul float %821, %834
-  %836 = fmul float %822, %834
-  %837 = fmul float %823, %834
+  %776 = fmul float %result.i, 5.000000e-01
+  %777 = fcmp uge float 0x3FE4CCCCC0000000, %776
+  %778 = select i1 %777, float 0x3FE4CCCCC0000000, float %776
+  %779 = fcmp uge float %778, 0x3FB99999A0000000
+  %780 = select i1 %779, float 0x3FB99999A0000000, float %778
+  %one.sub.a.i73 = fsub float 1.000000e+00, %780
+  %one.sub.ac.i74 = fmul float %one.sub.a.i73, %299
+  %mul.i75 = fmul float %result.i172, %299
+  %result.i76 = fadd float %mul.i75, %one.sub.ac.i74
+  %one.sub.a.i69 = fsub float 1.000000e+00, %780
+  %one.sub.ac.i70 = fmul float %one.sub.a.i69, %300
+  %mul.i71 = fmul float %result.i168, %300
+  %result.i72 = fadd float %mul.i71, %one.sub.ac.i70
+  %one.sub.a.i65 = fsub float 1.000000e+00, %780
+  %one.sub.ac.i66 = fmul float %one.sub.a.i65, %301
+  %mul.i67 = fmul float %result.i164, %301
+  %result.i68 = fadd float %mul.i67, %one.sub.ac.i66
+  %one.sub.a.i61 = fsub float 1.000000e+00, %780
+  %one.sub.ac.i62 = fmul float %one.sub.a.i61, %302
+  %mul.i63 = fmul float %result.i160, %302
+  %result.i64 = fadd float %mul.i63, %one.sub.ac.i62
+  %781 = insertelement <4 x float> undef, float %328, i32 0
+  %782 = insertelement <4 x float> %781, float %329, i32 1
+  %783 = insertelement <4 x float> %782, float %330, i32 2
+  %784 = insertelement <4 x float> %783, float 0.000000e+00, i32 3
+  %785 = insertelement <4 x float> undef, float %63, i32 0
+  %786 = insertelement <4 x float> %785, float %65, i32 1
+  %787 = insertelement <4 x float> %786, float %67, i32 2
+  %788 = insertelement <4 x float> %787, float 0.000000e+00, i32 3
+  %789 = call float @llvm.AMDGPU.dp4(<4 x float> %784, <4 x float> %788)
+  %790 = fcmp uge float 0x3FEB333340000000, %789
+  %791 = select i1 %790, float 0x3FEB333340000000, float %789
+  %792 = fmul float %8, %791
+  %793 = fmul float %13, %791
+  %794 = fmul float %18, %791
+  %795 = insertelement <4 x float> undef, float %34, i32 0
+  %796 = insertelement <4 x float> %795, float %35, i32 1
+  %797 = insertelement <4 x float> %796, float %36, i32 2
+  %798 = insertelement <4 x float> %797, float 0.000000e+00, i32 3
+  %799 = insertelement <4 x float> undef, float %63, i32 0
+  %800 = insertelement <4 x float> %799, float %65, i32 1
+  %801 = insertelement <4 x float> %800, float %67, i32 2
+  %802 = insertelement <4 x float> %801, float 0.000000e+00, i32 3
+  %803 = call float @llvm.AMDGPU.dp4(<4 x float> %798, <4 x float> %802)
+  %804 = fcmp uge float 0x3FECCCCCC0000000, %803
+  %805 = select i1 %804, float 0x3FECCCCCC0000000, float %803
+  %806 = fmul float %792, %805
+  %807 = fmul float %793, %805
+  %808 = fmul float %794, %805
   br label %ENDIF172
 
-ENDIF172:                                         ; preds = %ENDIF169, %IF173
-  %temp84.4 = phi float [ %806, %IF173 ], [ %temp84.3, %ENDIF169 ]
-  %temp85.4 = phi float [ %807, %IF173 ], [ %temp85.3, %ENDIF169 ]
-  %temp86.4 = phi float [ %808, %IF173 ], [ %temp86.3, %ENDIF169 ]
-  %temp87.4 = phi float [ %809, %IF173 ], [ %temp87.3, %ENDIF169 ]
-  %temp92.10 = phi float [ %835, %IF173 ], [ %temp92.9, %ENDIF169 ]
-  %temp93.4 = phi float [ %836, %IF173 ], [ %temp93.3, %ENDIF169 ]
-  %temp94.4 = phi float [ %837, %IF173 ], [ %temp94.3, %ENDIF169 ]
-  %838 = fcmp oge float %179, 2.670000e+03
-  %839 = sext i1 %838 to i32
-  %840 = bitcast i32 %839 to float
-  %841 = bitcast float %840 to i32
-  %842 = icmp ne i32 %841, 0
-  br i1 %842, label %IF176, label %ENDIF175
+ENDIF172:                                         ; preds = %IF173, %ENDIF169
+  %temp84.4 = phi float [ %result.i76, %IF173 ], [ %temp84.3, %ENDIF169 ]
+  %temp85.4 = phi float [ %result.i72, %IF173 ], [ %temp85.3, %ENDIF169 ]
+  %temp86.4 = phi float [ %result.i68, %IF173 ], [ %temp86.3, %ENDIF169 ]
+  %temp87.4 = phi float [ %result.i64, %IF173 ], [ %temp87.3, %ENDIF169 ]
+  %temp92.10 = phi float [ %806, %IF173 ], [ %temp92.9, %ENDIF169 ]
+  %temp93.4 = phi float [ %807, %IF173 ], [ %temp93.3, %ENDIF169 ]
+  %temp94.4 = phi float [ %808, %IF173 ], [ %temp94.3, %ENDIF169 ]
+  %809 = fcmp oge float %179, 2.670000e+03
+  %810 = sext i1 %809 to i32
+  %811 = bitcast i32 %810 to float
+  %812 = bitcast float %811 to i32
+  %813 = icmp ne i32 %812, 0
+  br i1 %813, label %IF176, label %ENDIF175
 
 IF176:                                            ; preds = %ENDIF172
-  %843 = fmul float %202, 0x3FB99999A0000000
-  %844 = fcmp uge float 0.000000e+00, %843
-  %845 = select i1 %844, float 0.000000e+00, float %843
-  %846 = fcmp uge float %845, 0x3FD99999A0000000
-  %847 = select i1 %846, float 0x3FD99999A0000000, float %845
-  %848 = call float @llvm.AMDGPU.lrp(float %847, float %400, float %300)
-  %849 = call float @llvm.AMDGPU.lrp(float %847, float %401, float %301)
-  %850 = call float @llvm.AMDGPU.lrp(float %847, float %402, float %302)
-  %851 = call float @llvm.AMDGPU.lrp(float %847, float %403, float %303)
-  %852 = insertelement <4 x float> undef, float %329, i32 0
-  %853 = insertelement <4 x float> %852, float %330, i32 1
-  %854 = insertelement <4 x float> %853, float %331, i32 2
-  %855 = insertelement <4 x float> %854, float 0.000000e+00, i32 3
-  %856 = insertelement <4 x float> undef, float %63, i32 0
-  %857 = insertelement <4 x float> %856, float %65, i32 1
-  %858 = insertelement <4 x float> %857, float %67, i32 2
-  %859 = insertelement <4 x float> %858, float 0.000000e+00, i32 3
-  %860 = call float @llvm.AMDGPU.dp4(<4 x float> %855, <4 x float> %859)
-  %861 = fcmp uge float 0x3FEB333340000000, %860
-  %862 = select i1 %861, float 0x3FEB333340000000, float %860
-  %863 = fmul float %8, %862
-  %864 = fmul float %13, %862
-  %865 = fmul float %18, %862
-  %866 = insertelement <4 x float> undef, float %34, i32 0
-  %867 = insertelement <4 x float> %866, float %35, i32 1
-  %868 = insertelement <4 x float> %867, float %36, i32 2
-  %869 = insertelement <4 x float> %868, float 0.000000e+00, i32 3
-  %870 = insertelement <4 x float> undef, float %63, i32 0
-  %871 = insertelement <4 x float> %870, float %65, i32 1
-  %872 = insertelement <4 x float> %871, float %67, i32 2
-  %873 = insertelement <4 x float> %872, float 0.000000e+00, i32 3
-  %874 = call float @llvm.AMDGPU.dp4(<4 x float> %869, <4 x float> %873)
-  %875 = fcmp uge float 0x3FECCCCCC0000000, %874
-  %876 = select i1 %875, float 0x3FECCCCCC0000000, float %874
-  %877 = fmul float %863, %876
-  %878 = fmul float %864, %876
-  %879 = fmul float %865, %876
+  %814 = fmul float %result.i, 0x3FB99999A0000000
+  %815 = fcmp uge float 0.000000e+00, %814
+  %816 = select i1 %815, float 0.000000e+00, float %814
+  %817 = fcmp uge float %816, 0x3FD99999A0000000
+  %818 = select i1 %817, float 0x3FD99999A0000000, float %816
+  %one.sub.a.i57 = fsub float 1.000000e+00, %818
+  %one.sub.ac.i58 = fmul float %one.sub.a.i57, %299
+  %mul.i59 = fmul float %result.i172, %299
+  %result.i60 = fadd float %mul.i59, %one.sub.ac.i58
+  %one.sub.a.i53 = fsub float 1.000000e+00, %818
+  %one.sub.ac.i54 = fmul float %one.sub.a.i53, %300
+  %mul.i55 = fmul float %result.i168, %300
+  %result.i56 = fadd float %mul.i55, %one.sub.ac.i54
+  %one.sub.a.i49 = fsub float 1.000000e+00, %818
+  %one.sub.ac.i50 = fmul float %one.sub.a.i49, %301
+  %mul.i51 = fmul float %result.i164, %301
+  %result.i52 = fadd float %mul.i51, %one.sub.ac.i50
+  %one.sub.a.i45 = fsub float 1.000000e+00, %818
+  %one.sub.ac.i46 = fmul float %one.sub.a.i45, %302
+  %mul.i47 = fmul float %result.i160, %302
+  %result.i48 = fadd float %mul.i47, %one.sub.ac.i46
+  %819 = insertelement <4 x float> undef, float %328, i32 0
+  %820 = insertelement <4 x float> %819, float %329, i32 1
+  %821 = insertelement <4 x float> %820, float %330, i32 2
+  %822 = insertelement <4 x float> %821, float 0.000000e+00, i32 3
+  %823 = insertelement <4 x float> undef, float %63, i32 0
+  %824 = insertelement <4 x float> %823, float %65, i32 1
+  %825 = insertelement <4 x float> %824, float %67, i32 2
+  %826 = insertelement <4 x float> %825, float 0.000000e+00, i32 3
+  %827 = call float @llvm.AMDGPU.dp4(<4 x float> %822, <4 x float> %826)
+  %828 = fcmp uge float 0x3FEB333340000000, %827
+  %829 = select i1 %828, float 0x3FEB333340000000, float %827
+  %830 = fmul float %8, %829
+  %831 = fmul float %13, %829
+  %832 = fmul float %18, %829
+  %833 = insertelement <4 x float> undef, float %34, i32 0
+  %834 = insertelement <4 x float> %833, float %35, i32 1
+  %835 = insertelement <4 x float> %834, float %36, i32 2
+  %836 = insertelement <4 x float> %835, float 0.000000e+00, i32 3
+  %837 = insertelement <4 x float> undef, float %63, i32 0
+  %838 = insertelement <4 x float> %837, float %65, i32 1
+  %839 = insertelement <4 x float> %838, float %67, i32 2
+  %840 = insertelement <4 x float> %839, float 0.000000e+00, i32 3
+  %841 = call float @llvm.AMDGPU.dp4(<4 x float> %836, <4 x float> %840)
+  %842 = fcmp uge float 0x3FECCCCCC0000000, %841
+  %843 = select i1 %842, float 0x3FECCCCCC0000000, float %841
+  %844 = fmul float %830, %843
+  %845 = fmul float %831, %843
+  %846 = fmul float %832, %843
   br label %ENDIF175
 
-ENDIF175:                                         ; preds = %ENDIF172, %IF176
-  %temp84.5 = phi float [ %848, %IF176 ], [ %temp84.4, %ENDIF172 ]
-  %temp85.5 = phi float [ %849, %IF176 ], [ %temp85.4, %ENDIF172 ]
-  %temp86.5 = phi float [ %850, %IF176 ], [ %temp86.4, %ENDIF172 ]
-  %temp87.5 = phi float [ %851, %IF176 ], [ %temp87.4, %ENDIF172 ]
-  %temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
-  %temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
-  %temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
-  %880 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %881 = extractelement <4 x float> %880, i32 0
-  %882 = fcmp olt float %881, %179
-  %883 = sext i1 %882 to i32
-  %884 = bitcast i32 %883 to float
-  %885 = bitcast float %884 to i32
-  %886 = icmp ne i32 %885, 0
-  br i1 %886, label %IF179, label %ENDIF178
+ENDIF175:                                         ; preds = %IF176, %ENDIF172
+  %temp84.5 = phi float [ %result.i60, %IF176 ], [ %temp84.4, %ENDIF172 ]
+  %temp85.5 = phi float [ %result.i56, %IF176 ], [ %temp85.4, %ENDIF172 ]
+  %temp86.5 = phi float [ %result.i52, %IF176 ], [ %temp86.4, %ENDIF172 ]
+  %temp87.5 = phi float [ %result.i48, %IF176 ], [ %temp87.4, %ENDIF172 ]
+  %temp92.11 = phi float [ %844, %IF176 ], [ %temp92.10, %ENDIF172 ]
+  %temp93.5 = phi float [ %845, %IF176 ], [ %temp93.4, %ENDIF172 ]
+  %temp94.5 = phi float [ %846, %IF176 ], [ %temp94.4, %ENDIF172 ]
+  %847 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %848 = extractelement <4 x float> %847, i32 0
+  %849 = fcmp olt float %848, %179
+  %850 = sext i1 %849 to i32
+  %851 = bitcast i32 %850 to float
+  %852 = bitcast float %851 to i32
+  %853 = icmp ne i32 %852, 0
+  br i1 %853, label %IF179, label %ENDIF178
 
 IF179:                                            ; preds = %ENDIF175
-  %887 = fadd float %202, 1.000000e+00
-  %888 = fadd float %202, 1.000000e+00
-  %889 = fadd float %202, 1.000000e+00
-  %890 = insertelement <4 x float> undef, float %43, i32 0
-  %891 = insertelement <4 x float> %890, float %44, i32 1
-  %892 = insertelement <4 x float> %891, float %45, i32 2
-  %893 = insertelement <4 x float> %892, float 0.000000e+00, i32 3
-  %894 = insertelement <4 x float> undef, float %43, i32 0
-  %895 = insertelement <4 x float> %894, float %44, i32 1
-  %896 = insertelement <4 x float> %895, float %45, i32 2
-  %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
-  %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
-  %899 = call float @llvm.AMDGPU.rsq.clamped.f32(float %898)
-  %900 = fmul float %45, %899
-  %901 = call float @fabs(float %900)
-  %902 = fmul float %176, 0x3FECCCCCC0000000
-  %903 = fadd float %902, %901
-  %904 = fadd float %903, 0xBFEFAE1480000000
-  %905 = fmul float %904, 0xC043FFFE20000000
-  %906 = call float @llvm.AMDIL.clamp.(float %905, float 0.000000e+00, float 1.000000e+00)
-  %907 = fmul float 2.000000e+00, %906
-  %908 = fsub float -0.000000e+00, %907
-  %909 = fadd float 3.000000e+00, %908
-  %910 = fmul float %906, %909
-  %911 = fmul float %906, %910
-  %912 = call float @llvm.AMDGPU.lrp(float %911, float %temp84.5, float %887)
-  %913 = call float @llvm.AMDGPU.lrp(float %911, float %temp85.5, float %888)
-  %914 = call float @llvm.AMDGPU.lrp(float %911, float %temp86.5, float %889)
-  %915 = call float @llvm.AMDGPU.lrp(float %911, float %temp87.5, float 0.000000e+00)
-  %916 = fmul float %202, 5.000000e-01
-  %917 = fcmp uge float 0x3FE4CCCCC0000000, %916
-  %918 = select i1 %917, float 0x3FE4CCCCC0000000, float %916
-  %919 = fcmp uge float %918, 0x3FE3333340000000
-  %920 = select i1 %919, float 0x3FE3333340000000, float %918
-  %921 = call float @llvm.AMDGPU.lrp(float %920, float %912, float %temp84.5)
-  %922 = call float @llvm.AMDGPU.lrp(float %920, float %913, float %temp85.5)
-  %923 = call float @llvm.AMDGPU.lrp(float %920, float %914, float %temp86.5)
-  %924 = call float @llvm.AMDGPU.lrp(float %920, float %915, float %temp87.5)
-  %925 = insertelement <4 x float> undef, float %329, i32 0
-  %926 = insertelement <4 x float> %925, float %330, i32 1
-  %927 = insertelement <4 x float> %926, float %331, i32 2
-  %928 = insertelement <4 x float> %927, float 0.000000e+00, i32 3
-  %929 = insertelement <4 x float> undef, float %63, i32 0
-  %930 = insertelement <4 x float> %929, float %65, i32 1
-  %931 = insertelement <4 x float> %930, float %67, i32 2
-  %932 = insertelement <4 x float> %931, float 0.000000e+00, i32 3
-  %933 = call float @llvm.AMDGPU.dp4(<4 x float> %928, <4 x float> %932)
-  %934 = fcmp uge float 0x3FE99999A0000000, %933
-  %935 = select i1 %934, float 0x3FE99999A0000000, float %933
-  %936 = fmul float %8, %935
-  %937 = fmul float %13, %935
-  %938 = fmul float %18, %935
-  %939 = insertelement <4 x float> undef, float %34, i32 0
-  %940 = insertelement <4 x float> %939, float %35, i32 1
-  %941 = insertelement <4 x float> %940, float %36, i32 2
-  %942 = insertelement <4 x float> %941, float 0.000000e+00, i32 3
-  %943 = insertelement <4 x float> undef, float %63, i32 0
-  %944 = insertelement <4 x float> %943, float %65, i32 1
-  %945 = insertelement <4 x float> %944, float %67, i32 2
-  %946 = insertelement <4 x float> %945, float 0.000000e+00, i32 3
-  %947 = call float @llvm.AMDGPU.dp4(<4 x float> %942, <4 x float> %946)
-  %948 = fcmp uge float 0x3FECCCCCC0000000, %947
-  %949 = select i1 %948, float 0x3FECCCCCC0000000, float %947
-  %950 = fmul float %936, %949
-  %951 = fmul float %937, %949
-  %952 = fmul float %938, %949
+  %854 = fadd float %result.i, 1.000000e+00
+  %855 = fadd float %result.i, 1.000000e+00
+  %856 = fadd float %result.i, 1.000000e+00
+  %857 = insertelement <4 x float> undef, float %43, i32 0
+  %858 = insertelement <4 x float> %857, float %44, i32 1
+  %859 = insertelement <4 x float> %858, float %45, i32 2
+  %860 = insertelement <4 x float> %859, float 0.000000e+00, i32 3
+  %861 = insertelement <4 x float> undef, float %43, i32 0
+  %862 = insertelement <4 x float> %861, float %44, i32 1
+  %863 = insertelement <4 x float> %862, float %45, i32 2
+  %864 = insertelement <4 x float> %863, float 0.000000e+00, i32 3
+  %865 = call float @llvm.AMDGPU.dp4(<4 x float> %860, <4 x float> %864)
+  %866 = call float @llvm.AMDGPU.rsq.clamped.f32(float %865)
+  %867 = fmul float %45, %866
+  %868 = call float @fabs(float %867)
+  %869 = fmul float %176, 0x3FECCCCCC0000000
+  %870 = fadd float %869, %868
+  %871 = fadd float %870, 0xBFEFAE1480000000
+  %872 = fmul float %871, 0xC043FFFE20000000
+  %873 = call float @llvm.AMDIL.clamp.(float %872, float 0.000000e+00, float 1.000000e+00)
+  %874 = fmul float 2.000000e+00, %873
+  %875 = fsub float -0.000000e+00, %874
+  %876 = fadd float 3.000000e+00, %875
+  %877 = fmul float %873, %876
+  %878 = fmul float %873, %877
+  %one.sub.a.i41 = fsub float 1.000000e+00, %878
+  %one.sub.ac.i42 = fmul float %one.sub.a.i41, %854
+  %mul.i43 = fmul float %temp84.5, %854
+  %result.i44 = fadd float %mul.i43, %one.sub.ac.i42
+  %one.sub.a.i37 = fsub float 1.000000e+00, %878
+  %one.sub.ac.i38 = fmul float %one.sub.a.i37, %855
+  %mul.i39 = fmul float %temp85.5, %855
+  %result.i40 = fadd float %mul.i39, %one.sub.ac.i38
+  %one.sub.a.i33 = fsub float 1.000000e+00, %878
+  %one.sub.ac.i34 = fmul float %one.sub.a.i33, %856
+  %mul.i35 = fmul float %temp86.5, %856
+  %result.i36 = fadd float %mul.i35, %one.sub.ac.i34
+  %one.sub.a.i29 = fsub float 1.000000e+00, %878
+  %one.sub.ac.i30 = fmul float %one.sub.a.i29, 0.000000e+00
+  %mul.i31 = fmul float %temp87.5, 0.000000e+00
+  %result.i32 = fadd float %mul.i31, %one.sub.ac.i30
+  %879 = fmul float %result.i, 5.000000e-01
+  %880 = fcmp uge float 0x3FE4CCCCC0000000, %879
+  %881 = select i1 %880, float 0x3FE4CCCCC0000000, float %879
+  %882 = fcmp uge float %881, 0x3FE3333340000000
+  %883 = select i1 %882, float 0x3FE3333340000000, float %881
+  %one.sub.a.i25 = fsub float 1.000000e+00, %883
+  %one.sub.ac.i26 = fmul float %one.sub.a.i25, %temp84.5
+  %mul.i27 = fmul float %result.i44, %temp84.5
+  %result.i28 = fadd float %mul.i27, %one.sub.ac.i26
+  %one.sub.a.i21 = fsub float 1.000000e+00, %883
+  %one.sub.ac.i22 = fmul float %one.sub.a.i21, %temp85.5
+  %mul.i23 = fmul float %result.i40, %temp85.5
+  %result.i24 = fadd float %mul.i23, %one.sub.ac.i22
+  %one.sub.a.i17 = fsub float 1.000000e+00, %883
+  %one.sub.ac.i18 = fmul float %one.sub.a.i17, %temp86.5
+  %mul.i19 = fmul float %result.i36, %temp86.5
+  %result.i20 = fadd float %mul.i19, %one.sub.ac.i18
+  %one.sub.a.i13 = fsub float 1.000000e+00, %883
+  %one.sub.ac.i14 = fmul float %one.sub.a.i13, %temp87.5
+  %mul.i15 = fmul float %result.i32, %temp87.5
+  %result.i16 = fadd float %mul.i15, %one.sub.ac.i14
+  %884 = insertelement <4 x float> undef, float %328, i32 0
+  %885 = insertelement <4 x float> %884, float %329, i32 1
+  %886 = insertelement <4 x float> %885, float %330, i32 2
+  %887 = insertelement <4 x float> %886, float 0.000000e+00, i32 3
+  %888 = insertelement <4 x float> undef, float %63, i32 0
+  %889 = insertelement <4 x float> %888, float %65, i32 1
+  %890 = insertelement <4 x float> %889, float %67, i32 2
+  %891 = insertelement <4 x float> %890, float 0.000000e+00, i32 3
+  %892 = call float @llvm.AMDGPU.dp4(<4 x float> %887, <4 x float> %891)
+  %893 = fcmp uge float 0x3FE99999A0000000, %892
+  %894 = select i1 %893, float 0x3FE99999A0000000, float %892
+  %895 = fmul float %8, %894
+  %896 = fmul float %13, %894
+  %897 = fmul float %18, %894
+  %898 = insertelement <4 x float> undef, float %34, i32 0
+  %899 = insertelement <4 x float> %898, float %35, i32 1
+  %900 = insertelement <4 x float> %899, float %36, i32 2
+  %901 = insertelement <4 x float> %900, float 0.000000e+00, i32 3
+  %902 = insertelement <4 x float> undef, float %63, i32 0
+  %903 = insertelement <4 x float> %902, float %65, i32 1
+  %904 = insertelement <4 x float> %903, float %67, i32 2
+  %905 = insertelement <4 x float> %904, float 0.000000e+00, i32 3
+  %906 = call float @llvm.AMDGPU.dp4(<4 x float> %901, <4 x float> %905)
+  %907 = fcmp uge float 0x3FECCCCCC0000000, %906
+  %908 = select i1 %907, float 0x3FECCCCCC0000000, float %906
+  %909 = fmul float %895, %908
+  %910 = fmul float %896, %908
+  %911 = fmul float %897, %908
   br label %ENDIF178
 
-ENDIF178:                                         ; preds = %ENDIF175, %IF179
-  %temp84.6 = phi float [ %921, %IF179 ], [ %temp84.5, %ENDIF175 ]
-  %temp85.6 = phi float [ %922, %IF179 ], [ %temp85.5, %ENDIF175 ]
-  %temp86.6 = phi float [ %923, %IF179 ], [ %temp86.5, %ENDIF175 ]
-  %temp87.6 = phi float [ %924, %IF179 ], [ %temp87.5, %ENDIF175 ]
-  %temp92.12 = phi float [ %950, %IF179 ], [ %temp92.11, %ENDIF175 ]
-  %temp93.6 = phi float [ %951, %IF179 ], [ %temp93.5, %ENDIF175 ]
-  %temp94.6 = phi float [ %952, %IF179 ], [ %temp94.5, %ENDIF175 ]
-  %953 = fmul float %55, %temp92.12
-  %954 = fmul float %57, %temp93.6
-  %955 = fmul float %59, %temp94.6
-  %956 = fmul float %61, 0.000000e+00
-  %957 = fmul float %temp84.6, %953
-  %958 = fmul float %temp85.6, %954
-  %959 = fmul float %temp86.6, %955
-  %960 = fmul float %temp87.6, %956
-  %961 = fmul float %2, -2.000000e+00
-  %962 = fadd float %961, 1.000000e+00
-  %963 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
-  %964 = extractelement <4 x float> %963, i32 2
-  %965 = fsub float -0.000000e+00, %964
-  %966 = fadd float %962, %965
-  %967 = fdiv float 1.000000e+00, %966
-  %968 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
-  %969 = extractelement <4 x float> %968, i32 2
-  %970 = fmul float %969, %967
-  %971 = fsub float -0.000000e+00, %53
-  %972 = fmul float %971, %53
-  %973 = fmul float %972, %970
-  %974 = fmul float %973, %970
-  %975 = fmul float %974, 0x3FF7154760000000
-  %976 = call float @llvm.AMDIL.exp.(float %975)
-  %977 = fcmp oeq float %53, 1.000000e+00
-  %978 = sext i1 %977 to i32
-  %979 = bitcast i32 %978 to float
-  %980 = bitcast float %979 to i32
-  %981 = icmp ne i32 %980, 0
-  %.184 = select i1 %981, float 1.000000e+00, float %976
-  %982 = call float @llvm.AMDGPU.lrp(float %.184, float %957, float %47)
-  %983 = call float @llvm.AMDGPU.lrp(float %.184, float %958, float %49)
-  %984 = call float @llvm.AMDGPU.lrp(float %.184, float %959, float %51)
-  %985 = insertelement <4 x float> undef, float %982, i32 0
-  %986 = insertelement <4 x float> %985, float %983, i32 1
-  %987 = insertelement <4 x float> %986, float %984, i32 2
-  %988 = insertelement <4 x float> %987, float %960, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %988, i32 0, i32 0)
+ENDIF178:                                         ; preds = %IF179, %ENDIF175
+  %temp84.6 = phi float [ %result.i28, %IF179 ], [ %temp84.5, %ENDIF175 ]
+  %temp85.6 = phi float [ %result.i24, %IF179 ], [ %temp85.5, %ENDIF175 ]
+  %temp86.6 = phi float [ %result.i20, %IF179 ], [ %temp86.5, %ENDIF175 ]
+  %temp87.6 = phi float [ %result.i16, %IF179 ], [ %temp87.5, %ENDIF175 ]
+  %temp92.12 = phi float [ %909, %IF179 ], [ %temp92.11, %ENDIF175 ]
+  %temp93.6 = phi float [ %910, %IF179 ], [ %temp93.5, %ENDIF175 ]
+  %temp94.6 = phi float [ %911, %IF179 ], [ %temp94.5, %ENDIF175 ]
+  %912 = fmul float %55, %temp92.12
+  %913 = fmul float %57, %temp93.6
+  %914 = fmul float %59, %temp94.6
+  %915 = fmul float %61, 0.000000e+00
+  %916 = fmul float %temp84.6, %912
+  %917 = fmul float %temp85.6, %913
+  %918 = fmul float %temp86.6, %914
+  %919 = fmul float %temp87.6, %915
+  %920 = fmul float %2, -2.000000e+00
+  %921 = fadd float %920, 1.000000e+00
+  %922 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %923 = extractelement <4 x float> %922, i32 2
+  %924 = fsub float -0.000000e+00, %923
+  %925 = fadd float %921, %924
+  %926 = fdiv float 1.000000e+00, %925
+  %927 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+  %928 = extractelement <4 x float> %927, i32 2
+  %929 = fmul float %928, %926
+  %930 = fsub float -0.000000e+00, %53
+  %931 = fmul float %930, %53
+  %932 = fmul float %931, %929
+  %933 = fmul float %932, %929
+  %934 = fmul float %933, 0x3FF7154760000000
+  %935 = call float @llvm.AMDIL.exp.(float %934)
+  %936 = fcmp oeq float %53, 1.000000e+00
+  %937 = sext i1 %936 to i32
+  %938 = bitcast i32 %937 to float
+  %939 = bitcast float %938 to i32
+  %940 = icmp ne i32 %939, 0
+  %.184 = select i1 %940, float 1.000000e+00, float %935
+  %one.sub.a.i9 = fsub float 1.000000e+00, %.184
+  %one.sub.ac.i10 = fmul float %one.sub.a.i9, %47
+  %mul.i11 = fmul float %916, %47
+  %result.i12 = fadd float %mul.i11, %one.sub.ac.i10
+  %one.sub.a.i5 = fsub float 1.000000e+00, %.184
+  %one.sub.ac.i6 = fmul float %one.sub.a.i5, %49
+  %mul.i7 = fmul float %917, %49
+  %result.i8 = fadd float %mul.i7, %one.sub.ac.i6
+  %one.sub.a.i1 = fsub float 1.000000e+00, %.184
+  %one.sub.ac.i2 = fmul float %one.sub.a.i1, %51
+  %mul.i3 = fmul float %918, %51
+  %result.i4 = fadd float %mul.i3, %one.sub.ac.i2
+  %941 = insertelement <4 x float> undef, float %result.i12, i32 0
+  %942 = insertelement <4 x float> %941, float %result.i8, i32 1
+  %943 = insertelement <4 x float> %942, float %result.i4, i32 2
+  %944 = insertelement <4 x float> %943, float %919, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %944, i32 0, i32 0)
   ret void
 }
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #2
 
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
+; Function Attrs: nounwind readnone
+declare float @llvm.AMDGPU.rsq.clamped.f32(float) #3
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #2
 
 ; Function Attrs: readonly
-declare float @fabs(float) #2
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.exp.(float) #1
+declare float @fabs(float) #4
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.lrp(float, float, float) #1
+declare float @llvm.AMDIL.exp.(float) #2
 
 ; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #1
+declare float @llvm.AMDIL.clamp.(float, float, float) #2
 
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { readnone }
-attributes #2 = { readonly }
+attributes #0 = { alwaysinline nounwind readnone }
+attributes #1 = { "ShaderType"="0" }
+attributes #2 = { readnone }
+attributes #3 = { nounwind readnone }
+attributes #4 = { readonly }

Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll?rev=258611&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.legacy.rsq.ll (removed)
@@ -1,13 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone
-
-; FUNC-LABEL: {{^}}rsq_legacy_f32:
-; SI: v_rsq_legacy_f32_e32
-; EG: RECIPSQRT_IEEE
-define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
-  %rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone
-  store float %rsq, float addrspace(1)* %out, align 4
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll?rev=258611&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.mul.ll (removed)
@@ -1,17 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> inreg %reg0) #0 {
-   %r0 = extractelement <4 x float> %reg0, i32 0
-   %r1 = extractelement <4 x float> %reg0, i32 1
-   %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
-   %vec = insertelement <4 x float> undef, float %r2, i32 0
-   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
-   ret void
-}
-
-declare float @llvm.AMDGPU.mul(float ,float ) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="0" }
\ No newline at end of file

Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll?rev=258611&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll (removed)
@@ -1,13 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
-
-; FUNC-LABEL: {{^}}test_lrp:
-; SI: v_mad_f32
-; SI: v_mac_f32_e32
-define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
-  %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
-  store float %mad, float addrspace(1)* %out, align 4
-  ret void
-}

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Fri Jan 22 23:42:38 2016
@@ -4,6 +4,8 @@
 ; These tests check that the compiler won't crash when it needs to spill
 ; SGPRs.
 
+ at ddxy_lds = external addrspace(3) global [64 x i32]
+
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: s_wqm
 
@@ -19,9 +21,7 @@
 ; Writing to M0 from an SMRD instruction will hang the GPU.
 ; CHECK-NOT: s_buffer_load_dword m0
 ; CHECK: s_endpgm
- at ddxy_lds = external addrspace(3) global [64 x i32]
-
-define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
   %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
@@ -657,52 +657,9 @@ ENDIF66:
   br label %LOOP65
 }
 
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
-
-; Function Attrs: readnone
-declare i32 @llvm.SI.tid() #2
-
-; Function Attrs: readonly
-declare float @ceil(float) #3
-
-; Function Attrs: readnone
-declare float @llvm.amdgcn.rsq.f32(float) #2
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
-
-; Function Attrs: readnone
-declare float @fabs(float) #2
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
-
-; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #4
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
-attributes #3 = { readonly }
-attributes #4 = { nounwind readonly }
-
-!0 = !{!"const", null, i32 1}
-
 ; CHECK-LABEL: {{^}}main1:
 ; CHECK: s_endpgm
-define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
   %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
@@ -918,425 +875,439 @@ main_body:
   %231 = fmul float %227, 0x4012611180000000
   %232 = fmul float %228, 0x4012611180000000
   %233 = fmul float %229, 0x4012611180000000
-  %234 = call float @llvm.AMDGPU.lrp(float %27, float %231, float 1.000000e+00)
-  %235 = call float @llvm.AMDGPU.lrp(float %27, float %232, float 1.000000e+00)
-  %236 = call float @llvm.AMDGPU.lrp(float %27, float %233, float 1.000000e+00)
-  %237 = fmul float %216, %184
-  %238 = fmul float %217, %185
-  %239 = fadd float %238, %237
-  %240 = fmul float %218, %186
-  %241 = fadd float %239, %240
-  %242 = fmul float %216, %187
-  %243 = fmul float %217, %188
-  %244 = fadd float %243, %242
-  %245 = fmul float %218, %189
-  %246 = fadd float %244, %245
-  %247 = fmul float %216, %190
-  %248 = fmul float %217, %191
-  %249 = fadd float %248, %247
-  %250 = fmul float %218, %192
-  %251 = fadd float %249, %250
-  %252 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00)
-  %253 = fmul float %214, 0x3F5A36E2E0000000
-  %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00)
-  %255 = fsub float -0.000000e+00, %254
-  %256 = fadd float 1.000000e+00, %255
-  %257 = call float @llvm.pow.f32(float %252, float 2.500000e-01)
-  %258 = fmul float %39, %257
-  %259 = fmul float %241, %258
-  %260 = fmul float %246, %258
-  %261 = fmul float %259, %230
-  %262 = fmul float %260, %230
-  %263 = fadd float %252, 0x3EE4F8B580000000
-  %264 = fsub float -0.000000e+00, %252
-  %265 = fadd float 1.000000e+00, %264
-  %266 = fmul float 1.200000e+01, %265
-  %267 = fadd float %266, 4.000000e+00
-  %268 = fsub float -0.000000e+00, %267
-  %269 = fmul float %268, %263
-  %270 = fsub float -0.000000e+00, %267
-  %271 = fmul float %270, %263
-  %272 = fsub float -0.000000e+00, %267
-  %273 = fmul float %272, %263
-  %274 = fdiv float 1.000000e+00, %269
-  %275 = fdiv float 1.000000e+00, %271
-  %276 = fdiv float 1.000000e+00, %273
-  %277 = fmul float %261, %274
-  %278 = fmul float %262, %275
-  %279 = fmul float %263, %276
+  %one.sub.a.i = fsub float 1.000000e+00, %27
+  %result.i = fadd float %231, %one.sub.a.i
+  %one.sub.a.i43 = fsub float 1.000000e+00, %27
+  %result.i44 = fadd float %232, %one.sub.a.i43
+  %one.sub.a.i41 = fsub float 1.000000e+00, %27
+  %result.i42 = fadd float %233, %one.sub.a.i41
+  %234 = fmul float %216, %184
+  %235 = fmul float %217, %185
+  %236 = fadd float %235, %234
+  %237 = fmul float %218, %186
+  %238 = fadd float %236, %237
+  %239 = fmul float %216, %187
+  %240 = fmul float %217, %188
+  %241 = fadd float %240, %239
+  %242 = fmul float %218, %189
+  %243 = fadd float %241, %242
+  %244 = fmul float %216, %190
+  %245 = fmul float %217, %191
+  %246 = fadd float %245, %244
+  %247 = fmul float %218, %192
+  %248 = fadd float %246, %247
+  %249 = call float @llvm.AMDIL.clamp.(float %248, float 0.000000e+00, float 1.000000e+00)
+  %250 = fmul float %214, 0x3F5A36E2E0000000
+  %251 = call float @llvm.AMDIL.clamp.(float %250, float 0.000000e+00, float 1.000000e+00)
+  %252 = fsub float -0.000000e+00, %251
+  %253 = fadd float 1.000000e+00, %252
+  %254 = call float @llvm.pow.f32(float %249, float 2.500000e-01)
+  %255 = fmul float %39, %254
+  %256 = fmul float %238, %255
+  %257 = fmul float %243, %255
+  %258 = fmul float %256, %230
+  %259 = fmul float %257, %230
+  %260 = fadd float %249, 0x3EE4F8B580000000
+  %261 = fsub float -0.000000e+00, %249
+  %262 = fadd float 1.000000e+00, %261
+  %263 = fmul float 1.200000e+01, %262
+  %264 = fadd float %263, 4.000000e+00
+  %265 = fsub float -0.000000e+00, %264
+  %266 = fmul float %265, %260
+  %267 = fsub float -0.000000e+00, %264
+  %268 = fmul float %267, %260
+  %269 = fsub float -0.000000e+00, %264
+  %270 = fmul float %269, %260
+  %271 = fdiv float 1.000000e+00, %266
+  %272 = fdiv float 1.000000e+00, %268
+  %273 = fdiv float 1.000000e+00, %270
+  %274 = fmul float %258, %271
+  %275 = fmul float %259, %272
+  %276 = fmul float %260, %273
   br label %LOOP
 
 LOOP:                                             ; preds = %LOOP, %main_body
-  %temp144.0 = phi float [ 1.000000e+00, %main_body ], [ %292, %LOOP ]
-  %temp168.0 = phi float [ %176, %main_body ], [ %288, %LOOP ]
-  %temp169.0 = phi float [ %177, %main_body ], [ %289, %LOOP ]
-  %temp170.0 = phi float [ %256, %main_body ], [ %290, %LOOP ]
-  %280 = bitcast float %temp168.0 to i32
-  %281 = bitcast float %temp169.0 to i32
-  %282 = insertelement <4 x i32> undef, i32 %280, i32 0
-  %283 = insertelement <4 x i32> %282, i32 %281, i32 1
-  %284 = insertelement <4 x i32> %283, i32 0, i32 2
-  %285 = insertelement <4 x i32> %284, i32 undef, i32 3
-  %286 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %285, <32 x i8> %147, <16 x i8> %149, i32 2)
-  %287 = extractelement <4 x float> %286, i32 3
-  %288 = fadd float %temp168.0, %277
-  %289 = fadd float %temp169.0, %278
-  %290 = fadd float %temp170.0, %279
-  %291 = fsub float -0.000000e+00, %287
-  %292 = fadd float %290, %291
-  %293 = fcmp oge float 0.000000e+00, %292
-  %294 = sext i1 %293 to i32
-  %295 = bitcast i32 %294 to float
-  %296 = bitcast float %295 to i32
-  %297 = icmp ne i32 %296, 0
-  br i1 %297, label %IF189, label %LOOP
+  %temp144.0 = phi float [ 1.000000e+00, %main_body ], [ %289, %LOOP ]
+  %temp168.0 = phi float [ %176, %main_body ], [ %285, %LOOP ]
+  %temp169.0 = phi float [ %177, %main_body ], [ %286, %LOOP ]
+  %temp170.0 = phi float [ %253, %main_body ], [ %287, %LOOP ]
+  %277 = bitcast float %temp168.0 to i32
+  %278 = bitcast float %temp169.0 to i32
+  %279 = insertelement <4 x i32> undef, i32 %277, i32 0
+  %280 = insertelement <4 x i32> %279, i32 %278, i32 1
+  %281 = insertelement <4 x i32> %280, i32 0, i32 2
+  %282 = insertelement <4 x i32> %281, i32 undef, i32 3
+  %283 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %282, <32 x i8> %147, <16 x i8> %149, i32 2)
+  %284 = extractelement <4 x float> %283, i32 3
+  %285 = fadd float %temp168.0, %274
+  %286 = fadd float %temp169.0, %275
+  %287 = fadd float %temp170.0, %276
+  %288 = fsub float -0.000000e+00, %284
+  %289 = fadd float %287, %288
+  %290 = fcmp oge float 0.000000e+00, %289
+  %291 = sext i1 %290 to i32
+  %292 = bitcast i32 %291 to float
+  %293 = bitcast float %292 to i32
+  %294 = icmp ne i32 %293, 0
+  br i1 %294, label %IF189, label %LOOP
 
 IF189:                                            ; preds = %LOOP
-  %298 = extractelement <4 x float> %286, i32 0
-  %299 = extractelement <4 x float> %286, i32 1
-  %300 = extractelement <4 x float> %286, i32 2
-  %301 = fsub float -0.000000e+00, %292
-  %302 = fadd float %temp144.0, %301
-  %303 = fdiv float 1.000000e+00, %302
-  %304 = fmul float %292, %303
-  %305 = fadd float %304, -1.000000e+00
-  %306 = fmul float %305, %277
-  %307 = fadd float %306, %288
-  %308 = fmul float %305, %278
-  %309 = fadd float %308, %289
-  %310 = fsub float -0.000000e+00, %176
-  %311 = fadd float %307, %310
-  %312 = fsub float -0.000000e+00, %177
-  %313 = fadd float %309, %312
-  %314 = fadd float %176, %311
-  %315 = fadd float %177, %313
-  %316 = fmul float %311, %67
-  %317 = fmul float %313, %68
-  %318 = fmul float %316, %55
-  %319 = fmul float %316, %56
-  %320 = fmul float %317, %57
-  %321 = fadd float %320, %318
-  %322 = fmul float %317, %58
-  %323 = fadd float %322, %319
-  %324 = fadd float %178, %321
-  %325 = fadd float %179, %323
-  %326 = fmul float %316, %59
-  %327 = fmul float %316, %60
-  %328 = fmul float %316, %61
-  %329 = fmul float %316, %62
-  %330 = fmul float %317, %63
-  %331 = fadd float %330, %326
-  %332 = fmul float %317, %64
-  %333 = fadd float %332, %327
-  %334 = fmul float %317, %65
-  %335 = fadd float %334, %328
-  %336 = fmul float %317, %66
-  %337 = fadd float %336, %329
-  %338 = fadd float %168, %331
-  %339 = fadd float %169, %333
-  %340 = fadd float %170, %335
-  %341 = fadd float %171, %337
-  %342 = bitcast float %338 to i32
-  %343 = bitcast float %339 to i32
-  %344 = insertelement <2 x i32> undef, i32 %342, i32 0
-  %345 = insertelement <2 x i32> %344, i32 %343, i32 1
-  %346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %345, <32 x i8> %135, <16 x i8> %137, i32 2)
-  %347 = extractelement <4 x float> %346, i32 0
-  %348 = extractelement <4 x float> %346, i32 1
-  %349 = extractelement <4 x float> %346, i32 2
-  %350 = extractelement <4 x float> %346, i32 3
-  %351 = fmul float %347, %23
-  %352 = fmul float %348, %24
-  %353 = fmul float %349, %25
-  %354 = fmul float %350, %26
-  %355 = fmul float %351, %180
-  %356 = fmul float %352, %181
-  %357 = fmul float %353, %182
-  %358 = fmul float %354, %183
-  %359 = fsub float -0.000000e+00, %350
-  %360 = fadd float 1.000000e+00, %359
-  %361 = fmul float %360, %49
-  %362 = call float @llvm.AMDGPU.lrp(float %361, float %347, float %355)
-  %363 = call float @llvm.AMDGPU.lrp(float %361, float %348, float %356)
-  %364 = call float @llvm.AMDGPU.lrp(float %361, float %349, float %357)
-  %365 = bitcast float %340 to i32
-  %366 = bitcast float %341 to i32
-  %367 = insertelement <2 x i32> undef, i32 %365, i32 0
-  %368 = insertelement <2 x i32> %367, i32 %366, i32 1
-  %369 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %368, <32 x i8> %151, <16 x i8> %153, i32 2)
-  %370 = extractelement <4 x float> %369, i32 2
-  %371 = fmul float %362, %234
-  %372 = fmul float %363, %235
-  %373 = fmul float %364, %236
-  %374 = fmul float %358, %230
-  %375 = bitcast float %314 to i32
-  %376 = bitcast float %315 to i32
-  %377 = insertelement <2 x i32> undef, i32 %375, i32 0
-  %378 = insertelement <2 x i32> %377, i32 %376, i32 1
-  %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %378, <32 x i8> %139, <16 x i8> %141, i32 2)
-  %380 = extractelement <4 x float> %379, i32 0
-  %381 = extractelement <4 x float> %379, i32 1
-  %382 = extractelement <4 x float> %379, i32 2
-  %383 = extractelement <4 x float> %379, i32 3
-  %384 = fcmp olt float 0.000000e+00, %382
-  %385 = sext i1 %384 to i32
-  %386 = bitcast i32 %385 to float
-  %387 = bitcast float %386 to i32
-  %388 = icmp ne i32 %387, 0
-  %.224 = select i1 %388, float %381, float %380
-  %.225 = select i1 %388, float %383, float %381
-  %389 = bitcast float %324 to i32
-  %390 = bitcast float %325 to i32
-  %391 = insertelement <2 x i32> undef, i32 %389, i32 0
-  %392 = insertelement <2 x i32> %391, i32 %390, i32 1
-  %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %392, <32 x i8> %143, <16 x i8> %145, i32 2)
-  %394 = extractelement <4 x float> %393, i32 0
-  %395 = extractelement <4 x float> %393, i32 1
-  %396 = extractelement <4 x float> %393, i32 2
-  %397 = extractelement <4 x float> %393, i32 3
-  %398 = fcmp olt float 0.000000e+00, %396
-  %399 = sext i1 %398 to i32
-  %400 = bitcast i32 %399 to float
-  %401 = bitcast float %400 to i32
-  %402 = icmp ne i32 %401, 0
-  %temp112.1 = select i1 %402, float %395, float %394
-  %temp113.1 = select i1 %402, float %397, float %395
-  %403 = fmul float %.224, 2.000000e+00
+  %295 = extractelement <4 x float> %283, i32 0
+  %296 = extractelement <4 x float> %283, i32 1
+  %297 = extractelement <4 x float> %283, i32 2
+  %298 = fsub float -0.000000e+00, %289
+  %299 = fadd float %temp144.0, %298
+  %300 = fdiv float 1.000000e+00, %299
+  %301 = fmul float %289, %300
+  %302 = fadd float %301, -1.000000e+00
+  %303 = fmul float %302, %274
+  %304 = fadd float %303, %285
+  %305 = fmul float %302, %275
+  %306 = fadd float %305, %286
+  %307 = fsub float -0.000000e+00, %176
+  %308 = fadd float %304, %307
+  %309 = fsub float -0.000000e+00, %177
+  %310 = fadd float %306, %309
+  %311 = fadd float %176, %308
+  %312 = fadd float %177, %310
+  %313 = fmul float %308, %67
+  %314 = fmul float %310, %68
+  %315 = fmul float %313, %55
+  %316 = fmul float %313, %56
+  %317 = fmul float %314, %57
+  %318 = fadd float %317, %315
+  %319 = fmul float %314, %58
+  %320 = fadd float %319, %316
+  %321 = fadd float %178, %318
+  %322 = fadd float %179, %320
+  %323 = fmul float %313, %59
+  %324 = fmul float %313, %60
+  %325 = fmul float %313, %61
+  %326 = fmul float %313, %62
+  %327 = fmul float %314, %63
+  %328 = fadd float %327, %323
+  %329 = fmul float %314, %64
+  %330 = fadd float %329, %324
+  %331 = fmul float %314, %65
+  %332 = fadd float %331, %325
+  %333 = fmul float %314, %66
+  %334 = fadd float %333, %326
+  %335 = fadd float %168, %328
+  %336 = fadd float %169, %330
+  %337 = fadd float %170, %332
+  %338 = fadd float %171, %334
+  %339 = bitcast float %335 to i32
+  %340 = bitcast float %336 to i32
+  %341 = insertelement <2 x i32> undef, i32 %339, i32 0
+  %342 = insertelement <2 x i32> %341, i32 %340, i32 1
+  %343 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %342, <32 x i8> %135, <16 x i8> %137, i32 2)
+  %344 = extractelement <4 x float> %343, i32 0
+  %345 = extractelement <4 x float> %343, i32 1
+  %346 = extractelement <4 x float> %343, i32 2
+  %347 = extractelement <4 x float> %343, i32 3
+  %348 = fmul float %344, %23
+  %349 = fmul float %345, %24
+  %350 = fmul float %346, %25
+  %351 = fmul float %347, %26
+  %352 = fmul float %348, %180
+  %353 = fmul float %349, %181
+  %354 = fmul float %350, %182
+  %355 = fmul float %351, %183
+  %356 = fsub float -0.000000e+00, %347
+  %357 = fadd float 1.000000e+00, %356
+  %358 = fmul float %357, %49
+  %one.sub.a.i37 = fsub float 1.000000e+00, %358
+  %one.sub.ac.i38 = fmul float %one.sub.a.i37, %352
+  %mul.i39 = fmul float %344, %352
+  %result.i40 = fadd float %mul.i39, %one.sub.ac.i38
+  %one.sub.a.i33 = fsub float 1.000000e+00, %358
+  %one.sub.ac.i34 = fmul float %one.sub.a.i33, %353
+  %mul.i35 = fmul float %345, %353
+  %result.i36 = fadd float %mul.i35, %one.sub.ac.i34
+  %one.sub.a.i29 = fsub float 1.000000e+00, %358
+  %one.sub.ac.i30 = fmul float %one.sub.a.i29, %354
+  %mul.i31 = fmul float %346, %354
+  %result.i32 = fadd float %mul.i31, %one.sub.ac.i30
+  %359 = bitcast float %337 to i32
+  %360 = bitcast float %338 to i32
+  %361 = insertelement <2 x i32> undef, i32 %359, i32 0
+  %362 = insertelement <2 x i32> %361, i32 %360, i32 1
+  %363 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %362, <32 x i8> %151, <16 x i8> %153, i32 2)
+  %364 = extractelement <4 x float> %363, i32 2
+  %365 = fmul float %result.i40, %result.i
+  %366 = fmul float %result.i36, %result.i44
+  %367 = fmul float %result.i32, %result.i42
+  %368 = fmul float %355, %230
+  %369 = bitcast float %311 to i32
+  %370 = bitcast float %312 to i32
+  %371 = insertelement <2 x i32> undef, i32 %369, i32 0
+  %372 = insertelement <2 x i32> %371, i32 %370, i32 1
+  %373 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %372, <32 x i8> %139, <16 x i8> %141, i32 2)
+  %374 = extractelement <4 x float> %373, i32 0
+  %375 = extractelement <4 x float> %373, i32 1
+  %376 = extractelement <4 x float> %373, i32 2
+  %377 = extractelement <4 x float> %373, i32 3
+  %378 = fcmp olt float 0.000000e+00, %376
+  %379 = sext i1 %378 to i32
+  %380 = bitcast i32 %379 to float
+  %381 = bitcast float %380 to i32
+  %382 = icmp ne i32 %381, 0
+  %.224 = select i1 %382, float %375, float %374
+  %.225 = select i1 %382, float %377, float %375
+  %383 = bitcast float %321 to i32
+  %384 = bitcast float %322 to i32
+  %385 = insertelement <2 x i32> undef, i32 %383, i32 0
+  %386 = insertelement <2 x i32> %385, i32 %384, i32 1
+  %387 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %386, <32 x i8> %143, <16 x i8> %145, i32 2)
+  %388 = extractelement <4 x float> %387, i32 0
+  %389 = extractelement <4 x float> %387, i32 1
+  %390 = extractelement <4 x float> %387, i32 2
+  %391 = extractelement <4 x float> %387, i32 3
+  %392 = fcmp olt float 0.000000e+00, %390
+  %393 = sext i1 %392 to i32
+  %394 = bitcast i32 %393 to float
+  %395 = bitcast float %394 to i32
+  %396 = icmp ne i32 %395, 0
+  %temp112.1 = select i1 %396, float %389, float %388
+  %temp113.1 = select i1 %396, float %391, float %389
+  %397 = fmul float %.224, 2.000000e+00
+  %398 = fadd float %397, -1.000000e+00
+  %399 = fmul float %.225, 2.000000e+00
+  %400 = fadd float %399, -1.000000e+00
+  %401 = fmul float %temp112.1, 2.000000e+00
+  %402 = fadd float %401, -1.000000e+00
+  %403 = fmul float %temp113.1, 2.000000e+00
   %404 = fadd float %403, -1.000000e+00
-  %405 = fmul float %.225, 2.000000e+00
-  %406 = fadd float %405, -1.000000e+00
-  %407 = fmul float %temp112.1, 2.000000e+00
-  %408 = fadd float %407, -1.000000e+00
-  %409 = fmul float %temp113.1, 2.000000e+00
-  %410 = fadd float %409, -1.000000e+00
+  %405 = fsub float -0.000000e+00, %398
+  %406 = fmul float %405, %35
+  %407 = fsub float -0.000000e+00, %400
+  %408 = fmul float %407, %35
+  %409 = fsub float -0.000000e+00, %402
+  %410 = fmul float %409, %36
   %411 = fsub float -0.000000e+00, %404
-  %412 = fmul float %411, %35
-  %413 = fsub float -0.000000e+00, %406
-  %414 = fmul float %413, %35
-  %415 = fsub float -0.000000e+00, %408
-  %416 = fmul float %415, %36
-  %417 = fsub float -0.000000e+00, %410
-  %418 = fmul float %417, %36
-  %419 = fmul float %416, %370
-  %420 = fmul float %418, %370
-  %421 = call float @fabs(float %412)
-  %422 = call float @fabs(float %414)
-  %423 = fsub float -0.000000e+00, %421
-  %424 = fadd float 1.000000e+00, %423
-  %425 = fsub float -0.000000e+00, %422
-  %426 = fadd float 1.000000e+00, %425
-  %427 = fmul float %424, %419
-  %428 = fadd float %427, %412
-  %429 = fmul float %426, %420
-  %430 = fadd float %429, %414
-  %431 = fmul float %428, %428
-  %432 = fmul float %430, %430
-  %433 = fadd float %431, %432
-  %434 = fsub float -0.000000e+00, %433
-  %435 = fadd float 0x3FF00068E0000000, %434
-  %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
-  %437 = call float @llvm.amdgcn.rsq.f32(float %436)
-  %438 = fmul float %437, %436
-  %439 = fsub float -0.000000e+00, %436
-  %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
-  %441 = fmul float %184, %428
-  %442 = fmul float %185, %428
-  %443 = fmul float %186, %428
-  %444 = fmul float %187, %430
-  %445 = fadd float %444, %441
-  %446 = fmul float %188, %430
-  %447 = fadd float %446, %442
-  %448 = fmul float %189, %430
+  %412 = fmul float %411, %36
+  %413 = fmul float %410, %364
+  %414 = fmul float %412, %364
+  %415 = call float @fabs(float %406)
+  %416 = call float @fabs(float %408)
+  %417 = fsub float -0.000000e+00, %415
+  %418 = fadd float 1.000000e+00, %417
+  %419 = fsub float -0.000000e+00, %416
+  %420 = fadd float 1.000000e+00, %419
+  %421 = fmul float %418, %413
+  %422 = fadd float %421, %406
+  %423 = fmul float %420, %414
+  %424 = fadd float %423, %408
+  %425 = fmul float %422, %422
+  %426 = fmul float %424, %424
+  %427 = fadd float %425, %426
+  %428 = fsub float -0.000000e+00, %427
+  %429 = fadd float 0x3FF00068E0000000, %428
+  %430 = call float @llvm.AMDIL.clamp.(float %429, float 0.000000e+00, float 1.000000e+00)
+  %431 = call float @llvm.amdgcn.rsq.f32(float %430)
+  %432 = fmul float %431, %430
+  %433 = fsub float -0.000000e+00, %430
+  %cmp = fcmp ogt float 0.000000e+00, %433
+  %434 = select i1 %cmp, float %432, float 0.000000e+00
+  %435 = fmul float %184, %422
+  %436 = fmul float %185, %422
+  %437 = fmul float %186, %422
+  %438 = fmul float %187, %424
+  %439 = fadd float %438, %435
+  %440 = fmul float %188, %424
+  %441 = fadd float %440, %436
+  %442 = fmul float %189, %424
+  %443 = fadd float %442, %437
+  %444 = fmul float %190, %434
+  %445 = fadd float %444, %439
+  %446 = fmul float %191, %434
+  %447 = fadd float %446, %441
+  %448 = fmul float %192, %434
   %449 = fadd float %448, %443
-  %450 = fmul float %190, %440
-  %451 = fadd float %450, %445
-  %452 = fmul float %191, %440
-  %453 = fadd float %452, %447
-  %454 = fmul float %192, %440
-  %455 = fadd float %454, %449
-  %456 = fmul float %451, %451
-  %457 = fmul float %453, %453
-  %458 = fadd float %457, %456
-  %459 = fmul float %455, %455
-  %460 = fadd float %458, %459
-  %461 = call float @llvm.amdgcn.rsq.f32(float %460)
-  %462 = fmul float %451, %461
-  %463 = fmul float %453, %461
-  %464 = fmul float %455, %461
-  %465 = fcmp olt float 0.000000e+00, %219
-  %466 = sext i1 %465 to i32
-  %467 = bitcast i32 %466 to float
-  %468 = bitcast float %467 to i32
-  %469 = icmp ne i32 %468, 0
-  br i1 %469, label %IF198, label %ENDIF197
+  %450 = fmul float %445, %445
+  %451 = fmul float %447, %447
+  %452 = fadd float %451, %450
+  %453 = fmul float %449, %449
+  %454 = fadd float %452, %453
+  %455 = call float @llvm.amdgcn.rsq.f32(float %454)
+  %456 = fmul float %445, %455
+  %457 = fmul float %447, %455
+  %458 = fmul float %449, %455
+  %459 = fcmp olt float 0.000000e+00, %219
+  %460 = sext i1 %459 to i32
+  %461 = bitcast i32 %460 to float
+  %462 = bitcast float %461 to i32
+  %463 = icmp ne i32 %462, 0
+  br i1 %463, label %IF198, label %ENDIF197
 
 IF198:                                            ; preds = %IF189
-  %470 = fsub float -0.000000e+00, %462
-  %471 = fsub float -0.000000e+00, %463
-  %472 = fsub float -0.000000e+00, %464
+  %464 = fsub float -0.000000e+00, %456
+  %465 = fsub float -0.000000e+00, %457
+  %466 = fsub float -0.000000e+00, %458
   br label %ENDIF197
 
-ENDIF197:                                         ; preds = %IF189, %IF198
-  %temp14.0 = phi float [ %472, %IF198 ], [ %464, %IF189 ]
-  %temp13.0 = phi float [ %471, %IF198 ], [ %463, %IF189 ]
-  %temp12.0 = phi float [ %470, %IF198 ], [ %462, %IF189 ]
-  %473 = bitcast float %220 to i32
-  %474 = bitcast float %221 to i32
-  %475 = insertelement <2 x i32> undef, i32 %473, i32 0
-  %476 = insertelement <2 x i32> %475, i32 %474, i32 1
-  %477 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %476, <32 x i8> %159, <16 x i8> %161, i32 2)
-  %478 = extractelement <4 x float> %477, i32 0
-  %479 = extractelement <4 x float> %477, i32 1
-  %480 = extractelement <4 x float> %477, i32 2
-  %481 = extractelement <4 x float> %477, i32 3
-  %482 = fmul float %478, %40
-  %483 = fadd float %482, %41
-  %484 = fmul float %479, %40
-  %485 = fadd float %484, %41
-  %486 = fmul float %480, %40
-  %487 = fadd float %486, %41
-  %488 = fmul float %481, %42
-  %489 = fadd float %488, %43
-  %490 = bitcast float %172 to i32
-  %491 = bitcast float %173 to i32
-  %492 = insertelement <2 x i32> undef, i32 %490, i32 0
-  %493 = insertelement <2 x i32> %492, i32 %491, i32 1
-  %494 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %493, <32 x i8> %155, <16 x i8> %157, i32 2)
-  %495 = extractelement <4 x float> %494, i32 0
-  %496 = extractelement <4 x float> %494, i32 1
-  %497 = extractelement <4 x float> %494, i32 2
-  %498 = extractelement <4 x float> %494, i32 3
-  %499 = fmul float %498, 3.200000e+01
-  %500 = fadd float %499, -1.600000e+01
-  %501 = call float @llvm.AMDIL.exp.(float %500)
-  %502 = fmul float %495, %501
-  %503 = fmul float %496, %501
-  %504 = fmul float %497, %501
-  %505 = fmul float %28, %502
-  %506 = fadd float %505, %193
-  %507 = fmul float %29, %503
-  %508 = fadd float %507, %194
-  %509 = fmul float %30, %504
-  %510 = fadd float %509, %195
-  %511 = fmul float %506, %489
-  %512 = fmul float %508, %489
-  %513 = fmul float %510, %489
-  %514 = fmul float %489, 5.000000e-01
-  %515 = fadd float %514, 5.000000e-01
-  %516 = fmul float %483, %515
-  %517 = fadd float %516, %511
-  %518 = fmul float %485, %515
-  %519 = fadd float %518, %512
-  %520 = fmul float %487, %515
-  %521 = fadd float %520, %513
-  %522 = fmul float %517, %371
-  %523 = fmul float %519, %372
-  %524 = fmul float %521, %373
-  %525 = fmul float %428, 0x3FDB272440000000
-  %526 = fmul float %430, 0xBFDB272440000000
-  %527 = fadd float %526, %525
-  %528 = fmul float %440, 0x3FE99999A0000000
-  %529 = fadd float %527, %528
-  %530 = fmul float %529, 5.000000e-01
-  %531 = fadd float %530, 0x3FE3333340000000
-  %532 = fmul float %531, %531
-  %533 = fmul float %522, %532
-  %534 = fmul float %523, %532
-  %535 = fmul float %524, %532
-  %536 = fsub float -0.000000e+00, %72
-  %537 = fsub float -0.000000e+00, %73
-  %538 = fsub float -0.000000e+00, %74
-  %539 = fmul float %temp12.0, %536
-  %540 = fmul float %temp13.0, %537
-  %541 = fadd float %540, %539
-  %542 = fmul float %temp14.0, %538
-  %543 = fadd float %541, %542
-  %544 = call float @llvm.AMDIL.clamp.(float %543, float 0.000000e+00, float 1.000000e+00)
-  %545 = fmul float %371, %544
-  %546 = fmul float %372, %544
-  %547 = fmul float %373, %544
-  %548 = fmul float %545, %69
-  %549 = fmul float %546, %70
-  %550 = fmul float %547, %71
-  %551 = fsub float -0.000000e+00, %164
-  %552 = fadd float %97, %551
-  %553 = fsub float -0.000000e+00, %165
-  %554 = fadd float %98, %553
-  %555 = fsub float -0.000000e+00, %166
-  %556 = fadd float %99, %555
-  %557 = fmul float %552, %552
-  %558 = fmul float %554, %554
-  %559 = fadd float %558, %557
-  %560 = fmul float %556, %556
+ENDIF197:                                         ; preds = %IF198, %IF189
+  %temp14.0 = phi float [ %466, %IF198 ], [ %458, %IF189 ]
+  %temp13.0 = phi float [ %465, %IF198 ], [ %457, %IF189 ]
+  %temp12.0 = phi float [ %464, %IF198 ], [ %456, %IF189 ]
+  %467 = bitcast float %220 to i32
+  %468 = bitcast float %221 to i32
+  %469 = insertelement <2 x i32> undef, i32 %467, i32 0
+  %470 = insertelement <2 x i32> %469, i32 %468, i32 1
+  %471 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %470, <32 x i8> %159, <16 x i8> %161, i32 2)
+  %472 = extractelement <4 x float> %471, i32 0
+  %473 = extractelement <4 x float> %471, i32 1
+  %474 = extractelement <4 x float> %471, i32 2
+  %475 = extractelement <4 x float> %471, i32 3
+  %476 = fmul float %472, %40
+  %477 = fadd float %476, %41
+  %478 = fmul float %473, %40
+  %479 = fadd float %478, %41
+  %480 = fmul float %474, %40
+  %481 = fadd float %480, %41
+  %482 = fmul float %475, %42
+  %483 = fadd float %482, %43
+  %484 = bitcast float %172 to i32
+  %485 = bitcast float %173 to i32
+  %486 = insertelement <2 x i32> undef, i32 %484, i32 0
+  %487 = insertelement <2 x i32> %486, i32 %485, i32 1
+  %488 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %487, <32 x i8> %155, <16 x i8> %157, i32 2)
+  %489 = extractelement <4 x float> %488, i32 0
+  %490 = extractelement <4 x float> %488, i32 1
+  %491 = extractelement <4 x float> %488, i32 2
+  %492 = extractelement <4 x float> %488, i32 3
+  %493 = fmul float %492, 3.200000e+01
+  %494 = fadd float %493, -1.600000e+01
+  %495 = call float @llvm.AMDIL.exp.(float %494)
+  %496 = fmul float %489, %495
+  %497 = fmul float %490, %495
+  %498 = fmul float %491, %495
+  %499 = fmul float %28, %496
+  %500 = fadd float %499, %193
+  %501 = fmul float %29, %497
+  %502 = fadd float %501, %194
+  %503 = fmul float %30, %498
+  %504 = fadd float %503, %195
+  %505 = fmul float %500, %483
+  %506 = fmul float %502, %483
+  %507 = fmul float %504, %483
+  %508 = fmul float %483, 5.000000e-01
+  %509 = fadd float %508, 5.000000e-01
+  %510 = fmul float %477, %509
+  %511 = fadd float %510, %505
+  %512 = fmul float %479, %509
+  %513 = fadd float %512, %506
+  %514 = fmul float %481, %509
+  %515 = fadd float %514, %507
+  %516 = fmul float %511, %365
+  %517 = fmul float %513, %366
+  %518 = fmul float %515, %367
+  %519 = fmul float %422, 0x3FDB272440000000
+  %520 = fmul float %424, 0xBFDB272440000000
+  %521 = fadd float %520, %519
+  %522 = fmul float %434, 0x3FE99999A0000000
+  %523 = fadd float %521, %522
+  %524 = fmul float %523, 5.000000e-01
+  %525 = fadd float %524, 0x3FE3333340000000
+  %526 = fmul float %525, %525
+  %527 = fmul float %516, %526
+  %528 = fmul float %517, %526
+  %529 = fmul float %518, %526
+  %530 = fsub float -0.000000e+00, %72
+  %531 = fsub float -0.000000e+00, %73
+  %532 = fsub float -0.000000e+00, %74
+  %533 = fmul float %temp12.0, %530
+  %534 = fmul float %temp13.0, %531
+  %535 = fadd float %534, %533
+  %536 = fmul float %temp14.0, %532
+  %537 = fadd float %535, %536
+  %538 = call float @llvm.AMDIL.clamp.(float %537, float 0.000000e+00, float 1.000000e+00)
+  %539 = fmul float %365, %538
+  %540 = fmul float %366, %538
+  %541 = fmul float %367, %538
+  %542 = fmul float %539, %69
+  %543 = fmul float %540, %70
+  %544 = fmul float %541, %71
+  %545 = fsub float -0.000000e+00, %164
+  %546 = fadd float %97, %545
+  %547 = fsub float -0.000000e+00, %165
+  %548 = fadd float %98, %547
+  %549 = fsub float -0.000000e+00, %166
+  %550 = fadd float %99, %549
+  %551 = fmul float %546, %546
+  %552 = fmul float %548, %548
+  %553 = fadd float %552, %551
+  %554 = fmul float %550, %550
+  %555 = fadd float %553, %554
+  %556 = call float @llvm.amdgcn.rsq.f32(float %555)
+  %557 = fmul float %556, %555
+  %558 = fsub float -0.000000e+00, %555
+  %cmp1 = fcmp ogt float %558, 0.000000e+00
+  %559 = select i1 %cmp1, float %557, float 0.000000e+00
+  %560 = fsub float -0.000000e+00, %84
   %561 = fadd float %559, %560
-  %562 = call float @llvm.amdgcn.rsq.f32(float %561)
-  %563 = fmul float %562, %561
-  %564 = fsub float -0.000000e+00, %561
-  %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
+  %562 = fsub float -0.000000e+00, %83
+  %563 = fadd float %559, %562
+  %564 = fsub float -0.000000e+00, %82
+  %565 = fadd float %559, %564
   %566 = fsub float -0.000000e+00, %84
-  %567 = fadd float %565, %566
+  %567 = fadd float %83, %566
   %568 = fsub float -0.000000e+00, %83
-  %569 = fadd float %565, %568
+  %569 = fadd float %82, %568
   %570 = fsub float -0.000000e+00, %82
-  %571 = fadd float %565, %570
-  %572 = fsub float -0.000000e+00, %84
-  %573 = fadd float %83, %572
-  %574 = fsub float -0.000000e+00, %83
-  %575 = fadd float %82, %574
-  %576 = fsub float -0.000000e+00, %82
-  %577 = fadd float %81, %576
-  %578 = fdiv float 1.000000e+00, %573
-  %579 = fdiv float 1.000000e+00, %575
-  %580 = fdiv float 1.000000e+00, %577
-  %581 = fmul float %567, %578
-  %582 = fmul float %569, %579
-  %583 = fmul float %571, %580
-  %584 = fcmp olt float %565, %83
-  %585 = sext i1 %584 to i32
-  %586 = bitcast i32 %585 to float
-  %587 = bitcast float %586 to i32
-  %588 = icmp ne i32 %587, 0
-  br i1 %588, label %ENDIF200, label %ELSE202
+  %571 = fadd float %81, %570
+  %572 = fdiv float 1.000000e+00, %567
+  %573 = fdiv float 1.000000e+00, %569
+  %574 = fdiv float 1.000000e+00, %571
+  %575 = fmul float %561, %572
+  %576 = fmul float %563, %573
+  %577 = fmul float %565, %574
+  %578 = fcmp olt float %559, %83
+  %579 = sext i1 %578 to i32
+  %580 = bitcast i32 %579 to float
+  %581 = bitcast float %580 to i32
+  %582 = icmp ne i32 %581, 0
+  br i1 %582, label %ENDIF200, label %ELSE202
 
 ELSE202:                                          ; preds = %ENDIF197
-  %589 = fcmp olt float %565, %82
-  %590 = sext i1 %589 to i32
-  %591 = bitcast i32 %590 to float
-  %592 = bitcast float %591 to i32
-  %593 = icmp ne i32 %592, 0
-  br i1 %593, label %ENDIF200, label %ELSE205
+  %583 = fcmp olt float %559, %82
+  %584 = sext i1 %583 to i32
+  %585 = bitcast i32 %584 to float
+  %586 = bitcast float %585 to i32
+  %587 = icmp ne i32 %586, 0
+  br i1 %587, label %ENDIF200, label %ELSE205
 
 ENDIF200:                                         ; preds = %ELSE205, %ELSE202, %ENDIF197
-  %temp80.0 = phi float [ %581, %ENDIF197 ], [ %.226, %ELSE205 ], [ %582, %ELSE202 ]
+  %temp80.0 = phi float [ %575, %ENDIF197 ], [ %.226, %ELSE205 ], [ %576, %ELSE202 ]
   %temp88.0 = phi float [ %122, %ENDIF197 ], [ %.227, %ELSE205 ], [ %120, %ELSE202 ]
   %temp89.0 = phi float [ %123, %ENDIF197 ], [ %.228, %ELSE205 ], [ %121, %ELSE202 ]
   %temp90.0 = phi float [ %120, %ENDIF197 ], [ %116, %ELSE205 ], [ %118, %ELSE202 ]
   %temp91.0 = phi float [ %121, %ENDIF197 ], [ %117, %ELSE205 ], [ %119, %ELSE202 ]
-  %594 = fcmp olt float %565, %83
-  %595 = sext i1 %594 to i32
-  %596 = bitcast i32 %595 to float
-  %597 = bitcast float %596 to i32
-  %598 = icmp ne i32 %597, 0
-  br i1 %598, label %ENDIF209, label %ELSE211
+  %588 = fcmp olt float %559, %83
+  %589 = sext i1 %588 to i32
+  %590 = bitcast i32 %589 to float
+  %591 = bitcast float %590 to i32
+  %592 = icmp ne i32 %591, 0
+  br i1 %592, label %ENDIF209, label %ELSE211
 
 ELSE205:                                          ; preds = %ELSE202
-  %599 = fcmp olt float %565, %81
-  %600 = sext i1 %599 to i32
-  %601 = bitcast i32 %600 to float
-  %602 = bitcast float %601 to i32
-  %603 = icmp ne i32 %602, 0
-  %.226 = select i1 %603, float %583, float 1.000000e+00
-  %.227 = select i1 %603, float %118, float %116
-  %.228 = select i1 %603, float %119, float %117
+  %593 = fcmp olt float %559, %81
+  %594 = sext i1 %593 to i32
+  %595 = bitcast i32 %594 to float
+  %596 = bitcast float %595 to i32
+  %597 = icmp ne i32 %596, 0
+  %.226 = select i1 %597, float %577, float 1.000000e+00
+  %.227 = select i1 %597, float %118, float %116
+  %.228 = select i1 %597, float %119, float %117
   br label %ENDIF200
 
 ELSE211:                                          ; preds = %ENDIF200
-  %604 = fcmp olt float %565, %82
-  %605 = sext i1 %604 to i32
-  %606 = bitcast i32 %605 to float
-  %607 = bitcast float %606 to i32
-  %608 = icmp ne i32 %607, 0
-  br i1 %608, label %ENDIF209, label %ELSE214
+  %598 = fcmp olt float %559, %82
+  %599 = sext i1 %598 to i32
+  %600 = bitcast i32 %599 to float
+  %601 = bitcast float %600 to i32
+  %602 = icmp ne i32 %601, 0
+  br i1 %602, label %ENDIF209, label %ELSE214
 
 ENDIF209:                                         ; preds = %ELSE214, %ELSE211, %ENDIF200
   %temp52.0 = phi float [ %108, %ENDIF200 ], [ %100, %ELSE214 ], [ %104, %ELSE211 ]
@@ -1347,232 +1318,286 @@ ENDIF209:
   %temp69.0 = phi float [ %113, %ENDIF200 ], [ %.231, %ELSE214 ], [ %109, %ELSE211 ]
   %temp70.0 = phi float [ %114, %ENDIF200 ], [ %.232, %ELSE214 ], [ %110, %ELSE211 ]
   %temp71.0 = phi float [ %115, %ENDIF200 ], [ %.233, %ELSE214 ], [ %111, %ELSE211 ]
-  %609 = fmul float %164, %85
-  %610 = fmul float %165, %86
-  %611 = fadd float %609, %610
-  %612 = fmul float %166, %87
-  %613 = fadd float %611, %612
-  %614 = fmul float %167, %88
-  %615 = fadd float %613, %614
-  %616 = fmul float %164, %89
-  %617 = fmul float %165, %90
-  %618 = fadd float %616, %617
-  %619 = fmul float %166, %91
-  %620 = fadd float %618, %619
-  %621 = fmul float %167, %92
-  %622 = fadd float %620, %621
-  %623 = fmul float %164, %93
-  %624 = fmul float %165, %94
-  %625 = fadd float %623, %624
-  %626 = fmul float %166, %95
-  %627 = fadd float %625, %626
-  %628 = fmul float %167, %96
-  %629 = fadd float %627, %628
-  %630 = fsub float -0.000000e+00, %78
-  %631 = fadd float 1.000000e+00, %630
-  %632 = call float @fabs(float %615)
-  %633 = call float @fabs(float %622)
-  %634 = fcmp oge float %631, %632
+  %603 = fmul float %164, %85
+  %604 = fmul float %165, %86
+  %605 = fadd float %603, %604
+  %606 = fmul float %166, %87
+  %607 = fadd float %605, %606
+  %608 = fmul float %167, %88
+  %609 = fadd float %607, %608
+  %610 = fmul float %164, %89
+  %611 = fmul float %165, %90
+  %612 = fadd float %610, %611
+  %613 = fmul float %166, %91
+  %614 = fadd float %612, %613
+  %615 = fmul float %167, %92
+  %616 = fadd float %614, %615
+  %617 = fmul float %164, %93
+  %618 = fmul float %165, %94
+  %619 = fadd float %617, %618
+  %620 = fmul float %166, %95
+  %621 = fadd float %619, %620
+  %622 = fmul float %167, %96
+  %623 = fadd float %621, %622
+  %624 = fsub float -0.000000e+00, %78
+  %625 = fadd float 1.000000e+00, %624
+  %626 = call float @fabs(float %609)
+  %627 = call float @fabs(float %616)
+  %628 = fcmp oge float %625, %626
+  %629 = sext i1 %628 to i32
+  %630 = bitcast i32 %629 to float
+  %631 = bitcast float %630 to i32
+  %632 = and i32 %631, 1065353216
+  %633 = bitcast i32 %632 to float
+  %634 = fcmp oge float %625, %627
   %635 = sext i1 %634 to i32
   %636 = bitcast i32 %635 to float
   %637 = bitcast float %636 to i32
   %638 = and i32 %637, 1065353216
   %639 = bitcast i32 %638 to float
-  %640 = fcmp oge float %631, %633
-  %641 = sext i1 %640 to i32
-  %642 = bitcast i32 %641 to float
-  %643 = bitcast float %642 to i32
-  %644 = and i32 %643, 1065353216
-  %645 = bitcast i32 %644 to float
-  %646 = fmul float %639, %645
-  %647 = fmul float %629, %646
-  %648 = fmul float %615, %temp68.0
-  %649 = fadd float %648, %temp70.0
-  %650 = fmul float %622, %temp69.0
-  %651 = fadd float %650, %temp71.0
-  %652 = fmul float %615, %temp52.0
-  %653 = fadd float %652, %temp54.0
-  %654 = fmul float %622, %temp53.0
-  %655 = fadd float %654, %temp55.0
-  %656 = fadd float %temp80.0, -1.000000e+00
-  %657 = fmul float %656, %77
-  %658 = fadd float %657, 1.000000e+00
-  %659 = call float @llvm.AMDIL.clamp.(float %658, float 0.000000e+00, float 1.000000e+00)
-  %660 = bitcast float %649 to i32
-  %661 = bitcast float %651 to i32
-  %662 = bitcast float 0.000000e+00 to i32
-  %663 = insertelement <4 x i32> undef, i32 %660, i32 0
-  %664 = insertelement <4 x i32> %663, i32 %661, i32 1
-  %665 = insertelement <4 x i32> %664, i32 %662, i32 2
-  %666 = insertelement <4 x i32> %665, i32 undef, i32 3
-  %667 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %666, <32 x i8> %127, <16 x i8> %129, i32 2)
-  %668 = extractelement <4 x float> %667, i32 0
-  %669 = extractelement <4 x float> %667, i32 1
-  %670 = bitcast float %653 to i32
-  %671 = bitcast float %655 to i32
-  %672 = bitcast float 0.000000e+00 to i32
-  %673 = insertelement <4 x i32> undef, i32 %670, i32 0
-  %674 = insertelement <4 x i32> %673, i32 %671, i32 1
-  %675 = insertelement <4 x i32> %674, i32 %672, i32 2
-  %676 = insertelement <4 x i32> %675, i32 undef, i32 3
-  %677 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %676, <32 x i8> %127, <16 x i8> %129, i32 2)
-  %678 = extractelement <4 x float> %677, i32 0
-  %679 = extractelement <4 x float> %677, i32 1
-  %680 = fsub float -0.000000e+00, %669
-  %681 = fadd float 1.000000e+00, %680
+  %640 = fmul float %633, %639
+  %641 = fmul float %623, %640
+  %642 = fmul float %609, %temp68.0
+  %643 = fadd float %642, %temp70.0
+  %644 = fmul float %616, %temp69.0
+  %645 = fadd float %644, %temp71.0
+  %646 = fmul float %609, %temp52.0
+  %647 = fadd float %646, %temp54.0
+  %648 = fmul float %616, %temp53.0
+  %649 = fadd float %648, %temp55.0
+  %650 = fadd float %temp80.0, -1.000000e+00
+  %651 = fmul float %650, %77
+  %652 = fadd float %651, 1.000000e+00
+  %653 = call float @llvm.AMDIL.clamp.(float %652, float 0.000000e+00, float 1.000000e+00)
+  %654 = bitcast float %643 to i32
+  %655 = bitcast float %645 to i32
+  %656 = bitcast float 0.000000e+00 to i32
+  %657 = insertelement <4 x i32> undef, i32 %654, i32 0
+  %658 = insertelement <4 x i32> %657, i32 %655, i32 1
+  %659 = insertelement <4 x i32> %658, i32 %656, i32 2
+  %660 = insertelement <4 x i32> %659, i32 undef, i32 3
+  %661 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %660, <32 x i8> %127, <16 x i8> %129, i32 2)
+  %662 = extractelement <4 x float> %661, i32 0
+  %663 = extractelement <4 x float> %661, i32 1
+  %664 = bitcast float %647 to i32
+  %665 = bitcast float %649 to i32
+  %666 = bitcast float 0.000000e+00 to i32
+  %667 = insertelement <4 x i32> undef, i32 %664, i32 0
+  %668 = insertelement <4 x i32> %667, i32 %665, i32 1
+  %669 = insertelement <4 x i32> %668, i32 %666, i32 2
+  %670 = insertelement <4 x i32> %669, i32 undef, i32 3
+  %671 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %670, <32 x i8> %127, <16 x i8> %129, i32 2)
+  %672 = extractelement <4 x float> %671, i32 0
+  %673 = extractelement <4 x float> %671, i32 1
+  %674 = fsub float -0.000000e+00, %663
+  %675 = fadd float 1.000000e+00, %674
+  %676 = fsub float -0.000000e+00, %673
+  %677 = fadd float 1.000000e+00, %676
+  %678 = fmul float %675, 2.500000e-01
+  %679 = fmul float %677, 2.500000e-01
+  %680 = fsub float -0.000000e+00, %678
+  %681 = fadd float %662, %680
   %682 = fsub float -0.000000e+00, %679
-  %683 = fadd float 1.000000e+00, %682
-  %684 = fmul float %681, 2.500000e-01
-  %685 = fmul float %683, 2.500000e-01
-  %686 = fsub float -0.000000e+00, %684
-  %687 = fadd float %668, %686
-  %688 = fsub float -0.000000e+00, %685
-  %689 = fadd float %678, %688
-  %690 = fmul float %647, %temp88.0
-  %691 = fadd float %690, %temp89.0
-  %692 = fmul float %647, %temp90.0
-  %693 = fadd float %692, %temp91.0
-  %694 = call float @llvm.AMDIL.clamp.(float %691, float 0.000000e+00, float 1.000000e+00)
-  %695 = call float @llvm.AMDIL.clamp.(float %693, float 0.000000e+00, float 1.000000e+00)
+  %683 = fadd float %672, %682
+  %684 = fmul float %641, %temp88.0
+  %685 = fadd float %684, %temp89.0
+  %686 = fmul float %641, %temp90.0
+  %687 = fadd float %686, %temp91.0
+  %688 = call float @llvm.AMDIL.clamp.(float %685, float 0.000000e+00, float 1.000000e+00)
+  %689 = call float @llvm.AMDIL.clamp.(float %687, float 0.000000e+00, float 1.000000e+00)
+  %690 = fsub float -0.000000e+00, %688
+  %691 = fadd float %662, %690
+  %692 = fsub float -0.000000e+00, %689
+  %693 = fadd float %672, %692
+  %694 = fmul float %662, %662
+  %695 = fmul float %672, %672
   %696 = fsub float -0.000000e+00, %694
-  %697 = fadd float %668, %696
+  %697 = fadd float %681, %696
   %698 = fsub float -0.000000e+00, %695
-  %699 = fadd float %678, %698
-  %700 = fmul float %668, %668
-  %701 = fmul float %678, %678
-  %702 = fsub float -0.000000e+00, %700
-  %703 = fadd float %687, %702
-  %704 = fsub float -0.000000e+00, %701
-  %705 = fadd float %689, %704
-  %706 = fcmp uge float %703, %75
-  %707 = select i1 %706, float %703, float %75
-  %708 = fcmp uge float %705, %75
-  %709 = select i1 %708, float %705, float %75
-  %710 = fmul float %697, %697
-  %711 = fadd float %710, %707
-  %712 = fmul float %699, %699
-  %713 = fadd float %712, %709
-  %714 = fdiv float 1.000000e+00, %711
-  %715 = fdiv float 1.000000e+00, %713
-  %716 = fmul float %707, %714
-  %717 = fmul float %709, %715
-  %718 = fcmp oge float %697, 0.000000e+00
-  %719 = sext i1 %718 to i32
-  %720 = bitcast i32 %719 to float
-  %721 = bitcast float %720 to i32
-  %722 = icmp ne i32 %721, 0
-  %.229 = select i1 %722, float 1.000000e+00, float %716
-  %723 = fcmp oge float %699, 0.000000e+00
-  %724 = sext i1 %723 to i32
-  %725 = bitcast i32 %724 to float
-  %726 = bitcast float %725 to i32
-  %727 = icmp ne i32 %726, 0
-  %temp28.0 = select i1 %727, float 1.000000e+00, float %717
-  %728 = call float @llvm.AMDGPU.lrp(float %659, float %temp28.0, float %.229)
-  %729 = call float @llvm.pow.f32(float %728, float %76)
-  %730 = fmul float %729, %79
-  %731 = fadd float %730, %80
-  %732 = call float @llvm.AMDIL.clamp.(float %731, float 0.000000e+00, float 1.000000e+00)
-  %733 = fmul float %732, %732
-  %734 = fmul float 2.000000e+00, %732
-  %735 = fsub float -0.000000e+00, %734
-  %736 = fadd float 3.000000e+00, %735
-  %737 = fmul float %733, %736
-  %738 = fmul float %548, %737
-  %739 = fmul float %549, %737
-  %740 = fmul float %550, %737
-  %741 = fmul float %738, %515
-  %742 = fadd float %741, %533
-  %743 = fmul float %739, %515
-  %744 = fadd float %743, %534
-  %745 = fmul float %740, %515
-  %746 = fadd float %745, %535
-  %747 = call float @llvm.AMDGPU.lrp(float %230, float %287, float 1.000000e+00)
-  %748 = call float @llvm.AMDGPU.lrp(float %37, float %298, float 1.000000e+00)
-  %749 = call float @llvm.AMDGPU.lrp(float %37, float %299, float 1.000000e+00)
-  %750 = call float @llvm.AMDGPU.lrp(float %37, float %300, float 1.000000e+00)
-  %751 = call float @llvm.AMDGPU.lrp(float %38, float %747, float 1.000000e+00)
-  %752 = fmul float %748, %751
-  %753 = fmul float %749, %751
-  %754 = fmul float %750, %751
-  %755 = fmul float %742, %752
-  %756 = fmul float %744, %753
-  %757 = fmul float %746, %754
-  %758 = fmul float %temp12.0, %216
-  %759 = fmul float %temp13.0, %217
-  %760 = fadd float %759, %758
-  %761 = fmul float %temp14.0, %218
-  %762 = fadd float %760, %761
-  %763 = call float @fabs(float %762)
-  %764 = fmul float %763, %763
-  %765 = fmul float %764, %50
-  %766 = fadd float %765, %51
-  %767 = call float @llvm.AMDIL.clamp.(float %766, float 0.000000e+00, float 1.000000e+00)
-  %768 = fsub float -0.000000e+00, %767
-  %769 = fadd float 1.000000e+00, %768
-  %770 = fmul float %33, %769
-  %771 = fmul float %33, %769
-  %772 = fmul float %33, %769
-  %773 = fmul float %34, %769
-  %774 = call float @llvm.AMDGPU.lrp(float %770, float %31, float %755)
-  %775 = call float @llvm.AMDGPU.lrp(float %771, float %31, float %756)
-  %776 = call float @llvm.AMDGPU.lrp(float %772, float %31, float %757)
-  %777 = call float @llvm.AMDGPU.lrp(float %773, float %32, float %374)
-  %778 = fcmp uge float %774, 0x3E6FFFFE60000000
-  %779 = select i1 %778, float %774, float 0x3E6FFFFE60000000
-  %780 = fcmp uge float %775, 0x3E6FFFFE60000000
-  %781 = select i1 %780, float %775, float 0x3E6FFFFE60000000
-  %782 = fcmp uge float %776, 0x3E6FFFFE60000000
-  %783 = select i1 %782, float %776, float 0x3E6FFFFE60000000
-  %784 = fcmp uge float %779, 6.550400e+04
-  %785 = select i1 %784, float 6.550400e+04, float %779
-  %786 = fcmp uge float %781, 6.550400e+04
-  %787 = select i1 %786, float 6.550400e+04, float %781
-  %788 = fcmp uge float %783, 6.550400e+04
-  %789 = select i1 %788, float 6.550400e+04, float %783
-  %790 = fmul float %777, %52
-  %791 = fadd float %790, %53
-  %792 = call float @llvm.AMDIL.clamp.(float %791, float 0.000000e+00, float 1.000000e+00)
-  %793 = call i32 @llvm.SI.packf16(float %785, float %787)
-  %794 = bitcast i32 %793 to float
-  %795 = call i32 @llvm.SI.packf16(float %789, float %792)
-  %796 = bitcast i32 %795 to float
-  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %794, float %796, float %794, float %796)
+  %699 = fadd float %683, %698
+  %700 = fcmp uge float %697, %75
+  %701 = select i1 %700, float %697, float %75
+  %702 = fcmp uge float %699, %75
+  %703 = select i1 %702, float %699, float %75
+  %704 = fmul float %691, %691
+  %705 = fadd float %704, %701
+  %706 = fmul float %693, %693
+  %707 = fadd float %706, %703
+  %708 = fdiv float 1.000000e+00, %705
+  %709 = fdiv float 1.000000e+00, %707
+  %710 = fmul float %701, %708
+  %711 = fmul float %703, %709
+  %712 = fcmp oge float %691, 0.000000e+00
+  %713 = sext i1 %712 to i32
+  %714 = bitcast i32 %713 to float
+  %715 = bitcast float %714 to i32
+  %716 = icmp ne i32 %715, 0
+  %.229 = select i1 %716, float 1.000000e+00, float %710
+  %717 = fcmp oge float %693, 0.000000e+00
+  %718 = sext i1 %717 to i32
+  %719 = bitcast i32 %718 to float
+  %720 = bitcast float %719 to i32
+  %721 = icmp ne i32 %720, 0
+  %temp28.0 = select i1 %721, float 1.000000e+00, float %711
+  %one.sub.a.i25 = fsub float 1.000000e+00, %653
+  %one.sub.ac.i26 = fmul float %one.sub.a.i25, %.229
+  %mul.i27 = fmul float %temp28.0, %.229
+  %result.i28 = fadd float %mul.i27, %one.sub.ac.i26
+  %722 = call float @llvm.pow.f32(float %result.i28, float %76)
+  %723 = fmul float %722, %79
+  %724 = fadd float %723, %80
+  %725 = call float @llvm.AMDIL.clamp.(float %724, float 0.000000e+00, float 1.000000e+00)
+  %726 = fmul float %725, %725
+  %727 = fmul float 2.000000e+00, %725
+  %728 = fsub float -0.000000e+00, %727
+  %729 = fadd float 3.000000e+00, %728
+  %730 = fmul float %726, %729
+  %731 = fmul float %542, %730
+  %732 = fmul float %543, %730
+  %733 = fmul float %544, %730
+  %734 = fmul float %731, %509
+  %735 = fadd float %734, %527
+  %736 = fmul float %732, %509
+  %737 = fadd float %736, %528
+  %738 = fmul float %733, %509
+  %739 = fadd float %738, %529
+  %one.sub.a.i23 = fsub float 1.000000e+00, %230
+  %result.i24 = fadd float %284, %one.sub.a.i23
+  %one.sub.a.i21 = fsub float 1.000000e+00, %37
+  %result.i22 = fadd float %295, %one.sub.a.i21
+  %one.sub.a.i19 = fsub float 1.000000e+00, %37
+  %result.i20 = fadd float %296, %one.sub.a.i19
+  %one.sub.a.i17 = fsub float 1.000000e+00, %37
+  %result.i18 = fadd float %297, %one.sub.a.i17
+  %one.sub.a.i15 = fsub float 1.000000e+00, %38
+  %result.i16 = fadd float %result.i24, %one.sub.a.i15
+  %740 = fmul float %result.i22, %result.i16
+  %741 = fmul float %result.i20, %result.i16
+  %742 = fmul float %result.i18, %result.i16
+  %743 = fmul float %735, %740
+  %744 = fmul float %737, %741
+  %745 = fmul float %739, %742
+  %746 = fmul float %temp12.0, %216
+  %747 = fmul float %temp13.0, %217
+  %748 = fadd float %747, %746
+  %749 = fmul float %temp14.0, %218
+  %750 = fadd float %748, %749
+  %751 = call float @fabs(float %750)
+  %752 = fmul float %751, %751
+  %753 = fmul float %752, %50
+  %754 = fadd float %753, %51
+  %755 = call float @llvm.AMDIL.clamp.(float %754, float 0.000000e+00, float 1.000000e+00)
+  %756 = fsub float -0.000000e+00, %755
+  %757 = fadd float 1.000000e+00, %756
+  %758 = fmul float %33, %757
+  %759 = fmul float %33, %757
+  %760 = fmul float %33, %757
+  %761 = fmul float %34, %757
+  %one.sub.a.i11 = fsub float 1.000000e+00, %758
+  %one.sub.ac.i12 = fmul float %one.sub.a.i11, %743
+  %mul.i13 = fmul float %31, %743
+  %result.i14 = fadd float %mul.i13, %one.sub.ac.i12
+  %one.sub.a.i7 = fsub float 1.000000e+00, %759
+  %one.sub.ac.i8 = fmul float %one.sub.a.i7, %744
+  %mul.i9 = fmul float %31, %744
+  %result.i10 = fadd float %mul.i9, %one.sub.ac.i8
+  %one.sub.a.i3 = fsub float 1.000000e+00, %760
+  %one.sub.ac.i4 = fmul float %one.sub.a.i3, %745
+  %mul.i5 = fmul float %31, %745
+  %result.i6 = fadd float %mul.i5, %one.sub.ac.i4
+  %one.sub.a.i1 = fsub float 1.000000e+00, %761
+  %one.sub.ac.i = fmul float %one.sub.a.i1, %368
+  %mul.i = fmul float %32, %368
+  %result.i2 = fadd float %mul.i, %one.sub.ac.i
+  %762 = fcmp uge float %result.i14, 0x3E6FFFFE60000000
+  %763 = select i1 %762, float %result.i14, float 0x3E6FFFFE60000000
+  %764 = fcmp uge float %result.i10, 0x3E6FFFFE60000000
+  %765 = select i1 %764, float %result.i10, float 0x3E6FFFFE60000000
+  %766 = fcmp uge float %result.i6, 0x3E6FFFFE60000000
+  %767 = select i1 %766, float %result.i6, float 0x3E6FFFFE60000000
+  %768 = fcmp uge float %763, 6.550400e+04
+  %769 = select i1 %768, float 6.550400e+04, float %763
+  %770 = fcmp uge float %765, 6.550400e+04
+  %771 = select i1 %770, float 6.550400e+04, float %765
+  %772 = fcmp uge float %767, 6.550400e+04
+  %773 = select i1 %772, float 6.550400e+04, float %767
+  %774 = fmul float %result.i2, %52
+  %775 = fadd float %774, %53
+  %776 = call float @llvm.AMDIL.clamp.(float %775, float 0.000000e+00, float 1.000000e+00)
+  %777 = call i32 @llvm.SI.packf16(float %769, float %771)
+  %778 = bitcast i32 %777 to float
+  %779 = call i32 @llvm.SI.packf16(float %773, float %776)
+  %780 = bitcast i32 %779 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %778, float %780, float %778, float %780)
   ret void
 
 ELSE214:                                          ; preds = %ELSE211
-  %797 = fcmp olt float %565, %81
-  %798 = sext i1 %797 to i32
-  %799 = bitcast i32 %798 to float
-  %800 = bitcast float %799 to i32
-  %801 = icmp ne i32 %800, 0
-  %.230 = select i1 %801, float %104, float %100
-  %.231 = select i1 %801, float %105, float %101
-  %.232 = select i1 %801, float %106, float %102
-  %.233 = select i1 %801, float %107, float %103
+  %781 = fcmp olt float %559, %81
+  %782 = sext i1 %781 to i32
+  %783 = bitcast i32 %782 to float
+  %784 = bitcast float %783 to i32
+  %785 = icmp ne i32 %784, 0
+  %.230 = select i1 %785, float %104, float %100
+  %.231 = select i1 %785, float %105, float %101
+  %.232 = select i1 %785, float %106, float %102
+  %.233 = select i1 %785, float %107, float %103
   br label %ENDIF209
 }
 
 ; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #2
+declare float @llvm.AMDIL.clamp.(float, float, float) #3
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #2
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.lrp(float, float, float) #2
+declare float @llvm.AMDIL.exp.(float) #3
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #2
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #2
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.cndlt(float, float, float) #2
+declare i32 @llvm.SI.tid() #3
+
+; Function Attrs: nounwind readonly
+declare float @ceil(float) #4
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.rsq.f32(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #2
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #3
 
 ; Function Attrs: readnone
-declare float @llvm.AMDIL.exp.(float) #2
+declare float @fabs(float) #3
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare float @llvm.pow.f32(float, float) #2
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #2
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+
+
+attributes #0 = { alwaysinline nounwind readnone }
+attributes #1 = { "ShaderType"="0" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { readnone }
+attributes #4 = { nounwind readonly }
 
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
-attributes #3 = { nounwind readonly }
-attributes #4 = { readonly }
+!0 = !{!1, !1, i64 0, i32 1}
+!1 = !{!"const", null}

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll?rev=258612&r1=258611&r2=258612&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll Fri Jan 22 23:42:38 2016
@@ -3,10 +3,10 @@
 
 ; If this occurs it is likely due to reordering and the restore was
 ; originally supposed to happen before SI_END_CF.
+
 ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
 ; SI-NOT: v_readlane_b32 [[SAVED]]
-
-define void @main() #0 {
+define void @main() #1 {
 main_body:
   %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
   %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
@@ -84,180 +84,182 @@ LOOP:
   br i1 %67, label %ENDLOOP, label %ENDIF
 
 ENDLOOP:                                          ; preds = %ELSE2566, %LOOP
-  %68 = call float @llvm.AMDGPU.lrp(float %0, float undef, float undef)
-  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %68, float undef, float 1.000000e+00)
+  %one.sub.a.i = fsub float 1.000000e+00, %0
+  %one.sub.ac.i = fmul float %one.sub.a.i, undef
+  %result.i = fadd float fmul (float undef, float undef), %one.sub.ac.i
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %result.i, float undef, float 1.000000e+00)
   ret void
 
 ENDIF:                                            ; preds = %LOOP
-  %69 = fsub float %2, undef
-  %70 = fsub float %3, undef
-  %71 = fsub float %4, undef
-  %72 = fmul float %69, 0.000000e+00
+  %68 = fsub float %2, undef
+  %69 = fsub float %3, undef
+  %70 = fsub float %4, undef
+  %71 = fmul float %68, 0.000000e+00
+  %72 = fmul float %69, undef
   %73 = fmul float %70, undef
-  %74 = fmul float %71, undef
-  %75 = fsub float %6, undef
-  %76 = fsub float %7, undef
-  %77 = fmul float %75, undef
-  %78 = fmul float %76, 0.000000e+00
-  %79 = call float @llvm.minnum.f32(float %74, float %78)
-  %80 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00)
-  %81 = call float @llvm.maxnum.f32(float %73, float %77)
-  %82 = call float @llvm.maxnum.f32(float undef, float %79)
-  %83 = call float @llvm.minnum.f32(float %80, float %81)
-  %84 = call float @llvm.minnum.f32(float %83, float undef)
-  %85 = fsub float %14, undef
-  %86 = fsub float %15, undef
-  %87 = fsub float %16, undef
+  %74 = fsub float %6, undef
+  %75 = fsub float %7, undef
+  %76 = fmul float %74, undef
+  %77 = fmul float %75, 0.000000e+00
+  %78 = call float @llvm.minnum.f32(float %73, float %77)
+  %79 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
+  %80 = call float @llvm.maxnum.f32(float %72, float %76)
+  %81 = call float @llvm.maxnum.f32(float undef, float %78)
+  %82 = call float @llvm.minnum.f32(float %79, float %80)
+  %83 = call float @llvm.minnum.f32(float %82, float undef)
+  %84 = fsub float %14, undef
+  %85 = fsub float %15, undef
+  %86 = fsub float %16, undef
+  %87 = fmul float %84, undef
   %88 = fmul float %85, undef
   %89 = fmul float %86, undef
-  %90 = fmul float %87, undef
-  %91 = fsub float %17, undef
-  %92 = fsub float %18, undef
-  %93 = fsub float %19, undef
-  %94 = fmul float %91, 0.000000e+00
+  %90 = fsub float %17, undef
+  %91 = fsub float %18, undef
+  %92 = fsub float %19, undef
+  %93 = fmul float %90, 0.000000e+00
+  %94 = fmul float %91, undef
   %95 = fmul float %92, undef
-  %96 = fmul float %93, undef
-  %97 = call float @llvm.minnum.f32(float %89, float %95)
-  %98 = call float @llvm.maxnum.f32(float %88, float %94)
-  %99 = call float @llvm.maxnum.f32(float %90, float %96)
-  %100 = call float @llvm.maxnum.f32(float undef, float %97)
-  %101 = call float @llvm.maxnum.f32(float %100, float undef)
-  %102 = call float @llvm.minnum.f32(float %98, float undef)
-  %103 = call float @llvm.minnum.f32(float %102, float %99)
-  %104 = fsub float %30, undef
-  %105 = fsub float %31, undef
+  %96 = call float @llvm.minnum.f32(float %88, float %94)
+  %97 = call float @llvm.maxnum.f32(float %87, float %93)
+  %98 = call float @llvm.maxnum.f32(float %89, float %95)
+  %99 = call float @llvm.maxnum.f32(float undef, float %96)
+  %100 = call float @llvm.maxnum.f32(float %99, float undef)
+  %101 = call float @llvm.minnum.f32(float %97, float undef)
+  %102 = call float @llvm.minnum.f32(float %101, float %98)
+  %103 = fsub float %30, undef
+  %104 = fsub float %31, undef
+  %105 = fmul float %103, 0.000000e+00
   %106 = fmul float %104, 0.000000e+00
-  %107 = fmul float %105, 0.000000e+00
-  %108 = call float @llvm.minnum.f32(float undef, float %106)
+  %107 = call float @llvm.minnum.f32(float undef, float %105)
+  %108 = call float @llvm.maxnum.f32(float undef, float %106)
   %109 = call float @llvm.maxnum.f32(float undef, float %107)
-  %110 = call float @llvm.maxnum.f32(float undef, float %108)
-  %111 = call float @llvm.maxnum.f32(float %110, float undef)
-  %112 = call float @llvm.minnum.f32(float undef, float %109)
-  %113 = fsub float %32, undef
-  %114 = fsub float %33, undef
-  %115 = fsub float %34, undef
-  %116 = fmul float %113, 0.000000e+00
+  %110 = call float @llvm.maxnum.f32(float %109, float undef)
+  %111 = call float @llvm.minnum.f32(float undef, float %108)
+  %112 = fsub float %32, undef
+  %113 = fsub float %33, undef
+  %114 = fsub float %34, undef
+  %115 = fmul float %112, 0.000000e+00
+  %116 = fmul float %113, undef
   %117 = fmul float %114, undef
-  %118 = fmul float %115, undef
-  %119 = fsub float %35, undef
-  %120 = fsub float %36, undef
-  %121 = fsub float %37, undef
+  %118 = fsub float %35, undef
+  %119 = fsub float %36, undef
+  %120 = fsub float %37, undef
+  %121 = fmul float %118, undef
   %122 = fmul float %119, undef
   %123 = fmul float %120, undef
-  %124 = fmul float %121, undef
+  %124 = call float @llvm.minnum.f32(float %115, float %121)
   %125 = call float @llvm.minnum.f32(float %116, float %122)
   %126 = call float @llvm.minnum.f32(float %117, float %123)
-  %127 = call float @llvm.minnum.f32(float %118, float %124)
-  %128 = call float @llvm.maxnum.f32(float %125, float %126)
-  %129 = call float @llvm.maxnum.f32(float %128, float %127)
-  %130 = fsub float %38, undef
-  %131 = fsub float %39, undef
-  %132 = fsub float %40, undef
-  %133 = fmul float %130, 0.000000e+00
+  %127 = call float @llvm.maxnum.f32(float %124, float %125)
+  %128 = call float @llvm.maxnum.f32(float %127, float %126)
+  %129 = fsub float %38, undef
+  %130 = fsub float %39, undef
+  %131 = fsub float %40, undef
+  %132 = fmul float %129, 0.000000e+00
+  %133 = fmul float %130, undef
   %134 = fmul float %131, undef
-  %135 = fmul float %132, undef
-  %136 = fsub float %41, undef
-  %137 = fsub float %42, undef
-  %138 = fsub float %43, undef
+  %135 = fsub float %41, undef
+  %136 = fsub float %42, undef
+  %137 = fsub float %43, undef
+  %138 = fmul float %135, undef
   %139 = fmul float %136, undef
   %140 = fmul float %137, undef
-  %141 = fmul float %138, undef
+  %141 = call float @llvm.minnum.f32(float %132, float %138)
   %142 = call float @llvm.minnum.f32(float %133, float %139)
   %143 = call float @llvm.minnum.f32(float %134, float %140)
-  %144 = call float @llvm.minnum.f32(float %135, float %141)
-  %145 = call float @llvm.maxnum.f32(float %142, float %143)
-  %146 = call float @llvm.maxnum.f32(float %145, float %144)
-  %147 = fsub float %44, undef
-  %148 = fsub float %45, undef
-  %149 = fsub float %46, undef
+  %144 = call float @llvm.maxnum.f32(float %141, float %142)
+  %145 = call float @llvm.maxnum.f32(float %144, float %143)
+  %146 = fsub float %44, undef
+  %147 = fsub float %45, undef
+  %148 = fsub float %46, undef
+  %149 = fmul float %146, 0.000000e+00
   %150 = fmul float %147, 0.000000e+00
-  %151 = fmul float %148, 0.000000e+00
-  %152 = fmul float %149, undef
-  %153 = fsub float %47, undef
-  %154 = fsub float %48, undef
-  %155 = fsub float %49, undef
-  %156 = fmul float %153, undef
-  %157 = fmul float %154, 0.000000e+00
-  %158 = fmul float %155, undef
+  %151 = fmul float %148, undef
+  %152 = fsub float %47, undef
+  %153 = fsub float %48, undef
+  %154 = fsub float %49, undef
+  %155 = fmul float %152, undef
+  %156 = fmul float %153, 0.000000e+00
+  %157 = fmul float %154, undef
+  %158 = call float @llvm.minnum.f32(float %149, float %155)
   %159 = call float @llvm.minnum.f32(float %150, float %156)
   %160 = call float @llvm.minnum.f32(float %151, float %157)
-  %161 = call float @llvm.minnum.f32(float %152, float %158)
-  %162 = call float @llvm.maxnum.f32(float %159, float %160)
-  %163 = call float @llvm.maxnum.f32(float %162, float %161)
-  %164 = fsub float %50, undef
-  %165 = fsub float %51, undef
-  %166 = fsub float %52, undef
-  %167 = fmul float %164, undef
+  %161 = call float @llvm.maxnum.f32(float %158, float %159)
+  %162 = call float @llvm.maxnum.f32(float %161, float %160)
+  %163 = fsub float %50, undef
+  %164 = fsub float %51, undef
+  %165 = fsub float %52, undef
+  %166 = fmul float %163, undef
+  %167 = fmul float %164, 0.000000e+00
   %168 = fmul float %165, 0.000000e+00
-  %169 = fmul float %166, 0.000000e+00
-  %170 = fsub float %53, undef
-  %171 = fsub float %54, undef
-  %172 = fsub float %55, undef
-  %173 = fdiv float 1.000000e+00, %temp18.0
+  %169 = fsub float %53, undef
+  %170 = fsub float %54, undef
+  %171 = fsub float %55, undef
+  %172 = fdiv float 1.000000e+00, %temp18.0
+  %173 = fmul float %169, undef
   %174 = fmul float %170, undef
-  %175 = fmul float %171, undef
-  %176 = fmul float %172, %173
+  %175 = fmul float %171, %172
+  %176 = call float @llvm.minnum.f32(float %166, float %173)
   %177 = call float @llvm.minnum.f32(float %167, float %174)
   %178 = call float @llvm.minnum.f32(float %168, float %175)
-  %179 = call float @llvm.minnum.f32(float %169, float %176)
-  %180 = call float @llvm.maxnum.f32(float %177, float %178)
-  %181 = call float @llvm.maxnum.f32(float %180, float %179)
-  %182 = fsub float %62, undef
-  %183 = fsub float %63, undef
-  %184 = fsub float %64, undef
-  %185 = fmul float %182, 0.000000e+00
+  %179 = call float @llvm.maxnum.f32(float %176, float %177)
+  %180 = call float @llvm.maxnum.f32(float %179, float %178)
+  %181 = fsub float %62, undef
+  %182 = fsub float %63, undef
+  %183 = fsub float %64, undef
+  %184 = fmul float %181, 0.000000e+00
+  %185 = fmul float %182, undef
   %186 = fmul float %183, undef
-  %187 = fmul float %184, undef
-  %188 = fsub float %65, undef
-  %189 = fsub float %66, undef
+  %187 = fsub float %65, undef
+  %188 = fsub float %66, undef
+  %189 = fmul float %187, undef
   %190 = fmul float %188, undef
-  %191 = fmul float %189, undef
+  %191 = call float @llvm.maxnum.f32(float %184, float %189)
   %192 = call float @llvm.maxnum.f32(float %185, float %190)
-  %193 = call float @llvm.maxnum.f32(float %186, float %191)
-  %194 = call float @llvm.maxnum.f32(float %187, float undef)
-  %195 = call float @llvm.minnum.f32(float %192, float %193)
-  %196 = call float @llvm.minnum.f32(float %195, float %194)
-  %.temp292.7 = select i1 undef, float %163, float undef
-  %temp292.9 = select i1 false, float %181, float %.temp292.7
+  %193 = call float @llvm.maxnum.f32(float %186, float undef)
+  %194 = call float @llvm.minnum.f32(float %191, float %192)
+  %195 = call float @llvm.minnum.f32(float %194, float %193)
+  %.temp292.7 = select i1 undef, float %162, float undef
+  %temp292.9 = select i1 false, float %180, float %.temp292.7
   %.temp292.9 = select i1 undef, float undef, float %temp292.9
-  %197 = fcmp ogt float undef, 0.000000e+00
-  %198 = fcmp olt float undef, %196
-  %199 = and i1 %197, %198
-  %200 = fcmp olt float undef, %.temp292.9
-  %201 = and i1 %199, %200
-  %temp292.11 = select i1 %201, float undef, float %.temp292.9
+  %196 = fcmp ogt float undef, 0.000000e+00
+  %197 = fcmp olt float undef, %195
+  %198 = and i1 %196, %197
+  %199 = fcmp olt float undef, %.temp292.9
+  %200 = and i1 %198, %199
+  %temp292.11 = select i1 %200, float undef, float %.temp292.9
   br i1 undef, label %IF2565, label %ELSE2566
 
 IF2565:                                           ; preds = %ENDIF
   br i1 false, label %ENDIF2582, label %ELSE2584
 
 ELSE2566:                                         ; preds = %ENDIF
-  %202 = fcmp oeq float %temp292.11, 1.000000e+04
-  br i1 %202, label %ENDLOOP, label %ELSE2593
+  %201 = fcmp oeq float %temp292.11, 1.000000e+04
+  br i1 %201, label %ENDLOOP, label %ELSE2593
 
 ENDIF2564:                                        ; preds = %ENDIF2594, %ENDIF2588
   %temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
-  %temp18.1 = phi float [ %219, %ENDIF2588 ], [ undef, %ENDIF2594 ]
-  %203 = fsub float %5, undef
-  %204 = fmul float %203, undef
-  %205 = call float @llvm.maxnum.f32(float undef, float %204)
+  %temp18.1 = phi float [ %218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
+  %202 = fsub float %5, undef
+  %203 = fmul float %202, undef
+  %204 = call float @llvm.maxnum.f32(float undef, float %203)
+  %205 = call float @llvm.minnum.f32(float %204, float undef)
   %206 = call float @llvm.minnum.f32(float %205, float undef)
-  %207 = call float @llvm.minnum.f32(float %206, float undef)
-  %208 = fcmp ogt float undef, 0.000000e+00
-  %209 = fcmp olt float undef, 1.000000e+00
-  %210 = and i1 %208, %209
-  %211 = fcmp olt float undef, %207
-  %212 = and i1 %210, %211
-  br i1 %212, label %ENDIF2795, label %ELSE2797
+  %207 = fcmp ogt float undef, 0.000000e+00
+  %208 = fcmp olt float undef, 1.000000e+00
+  %209 = and i1 %207, %208
+  %210 = fcmp olt float undef, %206
+  %211 = and i1 %209, %210
+  br i1 %211, label %ENDIF2795, label %ELSE2797
 
 ELSE2584:                                         ; preds = %IF2565
   br label %ENDIF2582
 
 ENDIF2582:                                        ; preds = %ELSE2584, %IF2565
-  %213 = fadd float %1, undef
-  %214 = fadd float 0.000000e+00, %213
-  %floor = call float @llvm.floor.f32(float %214)
-  %215 = fsub float %214, %floor
+  %212 = fadd float %1, undef
+  %213 = fadd float 0.000000e+00, %212
+  %floor = call float @llvm.floor.f32(float %213)
+  %214 = fsub float %213, %floor
   br i1 undef, label %IF2589, label %ELSE2590
 
 IF2589:                                           ; preds = %ENDIF2582
@@ -267,61 +269,61 @@ ELSE2590:
   br label %ENDIF2588
 
 ENDIF2588:                                        ; preds = %ELSE2590, %IF2589
-  %216 = fsub float 1.000000e+00, %215
-  %217 = call float @llvm.sqrt.f32(float %216)
-  %218 = fmul float %217, undef
-  %219 = fadd float %218, undef
+  %215 = fsub float 1.000000e+00, %214
+  %216 = call float @llvm.sqrt.f32(float %215)
+  %217 = fmul float %216, undef
+  %218 = fadd float %217, undef
   br label %ENDIF2564
 
 ELSE2593:                                         ; preds = %ELSE2566
-  %220 = fcmp oeq float %temp292.11, %82
-  %221 = fcmp olt float %82, %84
-  %222 = and i1 %220, %221
-  br i1 %222, label %ENDIF2594, label %ELSE2596
+  %219 = fcmp oeq float %temp292.11, %81
+  %220 = fcmp olt float %81, %83
+  %221 = and i1 %219, %220
+  br i1 %221, label %ENDIF2594, label %ELSE2596
 
 ELSE2596:                                         ; preds = %ELSE2593
-  %223 = fcmp oeq float %temp292.11, %101
-  %224 = fcmp olt float %101, %103
-  %225 = and i1 %223, %224
-  br i1 %225, label %ENDIF2594, label %ELSE2632
+  %222 = fcmp oeq float %temp292.11, %100
+  %223 = fcmp olt float %100, %102
+  %224 = and i1 %222, %223
+  br i1 %224, label %ENDIF2594, label %ELSE2632
 
 ENDIF2594:                                        ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
   %temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
-  %226 = fmul float %temp894.2, undef
+  %225 = fmul float %temp894.2, undef
   br label %ENDIF2564
 
 ELSE2632:                                         ; preds = %ELSE2596
   br i1 undef, label %ENDIF2594, label %ELSE2650
 
 ELSE2650:                                         ; preds = %ELSE2632
-  %227 = fcmp oeq float %temp292.11, %111
-  %228 = fcmp olt float %111, %112
-  %229 = and i1 %227, %228
-  br i1 %229, label %IF2667, label %ELSE2668
+  %226 = fcmp oeq float %temp292.11, %110
+  %227 = fcmp olt float %110, %111
+  %228 = and i1 %226, %227
+  br i1 %228, label %IF2667, label %ELSE2668
 
 IF2667:                                           ; preds = %ELSE2650
   br i1 undef, label %ENDIF2594, label %ELSE2671
 
 ELSE2668:                                         ; preds = %ELSE2650
-  %230 = fcmp oeq float %temp292.11, %129
-  %231 = fcmp olt float %129, undef
-  %232 = and i1 %230, %231
-  br i1 %232, label %ENDIF2594, label %ELSE2686
+  %229 = fcmp oeq float %temp292.11, %128
+  %230 = fcmp olt float %128, undef
+  %231 = and i1 %229, %230
+  br i1 %231, label %ENDIF2594, label %ELSE2686
 
 ELSE2671:                                         ; preds = %IF2667
   br label %ENDIF2594
 
 ELSE2686:                                         ; preds = %ELSE2668
-  %233 = fcmp oeq float %temp292.11, %146
-  %234 = fcmp olt float %146, undef
-  %235 = and i1 %233, %234
-  br i1 %235, label %ENDIF2594, label %ELSE2704
+  %232 = fcmp oeq float %temp292.11, %145
+  %233 = fcmp olt float %145, undef
+  %234 = and i1 %232, %233
+  br i1 %234, label %ENDIF2594, label %ELSE2704
 
 ELSE2704:                                         ; preds = %ELSE2686
-  %236 = fcmp oeq float %temp292.11, %181
-  %237 = fcmp olt float %181, undef
-  %238 = and i1 %236, %237
-  br i1 %238, label %ENDIF2594, label %ELSE2740
+  %235 = fcmp oeq float %temp292.11, %180
+  %236 = fcmp olt float %180, undef
+  %237 = and i1 %235, %236
+  br i1 %237, label %ENDIF2594, label %ELSE2740
 
 ELSE2740:                                         ; preds = %ELSE2704
   br i1 undef, label %IF2757, label %ELSE2758
@@ -336,8 +338,8 @@ ELSE2761:
   br label %ENDIF2594
 
 IF2775:                                           ; preds = %ELSE2758
-  %239 = fcmp olt float undef, undef
-  br i1 %239, label %ENDIF2594, label %ELSE2779
+  %238 = fcmp olt float undef, undef
+  br i1 %238, label %ENDIF2594, label %ELSE2779
 
 ELSE2779:                                         ; preds = %IF2775
   br i1 undef, label %ENDIF2594, label %ELSE2782
@@ -346,39 +348,39 @@ ELSE2782:
   br i1 undef, label %ENDIF2594, label %ELSE2785
 
 ELSE2785:                                         ; preds = %ELSE2782
-  %240 = fcmp olt float undef, 0.000000e+00
-  br i1 %240, label %ENDIF2594, label %ELSE2788
+  %239 = fcmp olt float undef, 0.000000e+00
+  br i1 %239, label %ENDIF2594, label %ELSE2788
 
 ELSE2788:                                         ; preds = %ELSE2785
-  %241 = fcmp olt float 0.000000e+00, undef
-  %.2848 = select i1 %241, float -1.000000e+00, float 1.000000e+00
+  %240 = fcmp olt float 0.000000e+00, undef
+  %.2848 = select i1 %240, float -1.000000e+00, float 1.000000e+00
   br label %ENDIF2594
 
 ELSE2797:                                         ; preds = %ENDIF2564
-  %242 = fsub float %8, undef
-  %243 = fsub float %9, undef
-  %244 = fsub float %10, undef
+  %241 = fsub float %8, undef
+  %242 = fsub float %9, undef
+  %243 = fsub float %10, undef
+  %244 = fmul float %241, undef
   %245 = fmul float %242, undef
   %246 = fmul float %243, undef
-  %247 = fmul float %244, undef
-  %248 = fsub float %11, undef
-  %249 = fsub float %12, undef
-  %250 = fsub float %13, undef
+  %247 = fsub float %11, undef
+  %248 = fsub float %12, undef
+  %249 = fsub float %13, undef
+  %250 = fmul float %247, undef
   %251 = fmul float %248, undef
   %252 = fmul float %249, undef
-  %253 = fmul float %250, undef
+  %253 = call float @llvm.minnum.f32(float %244, float %250)
   %254 = call float @llvm.minnum.f32(float %245, float %251)
-  %255 = call float @llvm.minnum.f32(float %246, float %252)
-  %256 = call float @llvm.maxnum.f32(float %247, float %253)
-  %257 = call float @llvm.maxnum.f32(float %254, float %255)
-  %258 = call float @llvm.maxnum.f32(float %257, float undef)
-  %259 = call float @llvm.minnum.f32(float undef, float %256)
-  %260 = fcmp ogt float %258, 0.000000e+00
-  %261 = fcmp olt float %258, 1.000000e+00
-  %262 = and i1 %260, %261
-  %263 = fcmp olt float %258, %259
-  %264 = and i1 %262, %263
-  br i1 %264, label %ENDIF2795, label %ELSE2800
+  %255 = call float @llvm.maxnum.f32(float %246, float %252)
+  %256 = call float @llvm.maxnum.f32(float %253, float %254)
+  %257 = call float @llvm.maxnum.f32(float %256, float undef)
+  %258 = call float @llvm.minnum.f32(float undef, float %255)
+  %259 = fcmp ogt float %257, 0.000000e+00
+  %260 = fcmp olt float %257, 1.000000e+00
+  %261 = and i1 %259, %260
+  %262 = fcmp olt float %257, %258
+  %263 = and i1 %261, %262
+  br i1 %263, label %ENDIF2795, label %ELSE2800
 
 ENDIF2795:                                        ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
   br label %LOOP
@@ -387,53 +389,53 @@ ELSE2800:
   br i1 undef, label %ENDIF2795, label %ELSE2803
 
 ELSE2803:                                         ; preds = %ELSE2800
-  %265 = fsub float %20, undef
-  %266 = fsub float %21, undef
-  %267 = fsub float %22, undef
+  %264 = fsub float %20, undef
+  %265 = fsub float %21, undef
+  %266 = fsub float %22, undef
+  %267 = fmul float %264, undef
   %268 = fmul float %265, undef
-  %269 = fmul float %266, undef
-  %270 = fmul float %267, 0.000000e+00
-  %271 = fsub float %23, undef
-  %272 = fsub float %24, undef
-  %273 = fsub float %25, undef
+  %269 = fmul float %266, 0.000000e+00
+  %270 = fsub float %23, undef
+  %271 = fsub float %24, undef
+  %272 = fsub float %25, undef
+  %273 = fmul float %270, undef
   %274 = fmul float %271, undef
   %275 = fmul float %272, undef
-  %276 = fmul float %273, undef
-  %277 = call float @llvm.minnum.f32(float %268, float %274)
+  %276 = call float @llvm.minnum.f32(float %267, float %273)
+  %277 = call float @llvm.maxnum.f32(float %268, float %274)
   %278 = call float @llvm.maxnum.f32(float %269, float %275)
-  %279 = call float @llvm.maxnum.f32(float %270, float %276)
-  %280 = call float @llvm.maxnum.f32(float %277, float undef)
-  %281 = call float @llvm.maxnum.f32(float %280, float undef)
-  %282 = call float @llvm.minnum.f32(float undef, float %278)
-  %283 = call float @llvm.minnum.f32(float %282, float %279)
-  %284 = fcmp ogt float %281, 0.000000e+00
-  %285 = fcmp olt float %281, 1.000000e+00
-  %286 = and i1 %284, %285
-  %287 = fcmp olt float %281, %283
-  %288 = and i1 %286, %287
-  br i1 %288, label %ENDIF2795, label %ELSE2806
+  %279 = call float @llvm.maxnum.f32(float %276, float undef)
+  %280 = call float @llvm.maxnum.f32(float %279, float undef)
+  %281 = call float @llvm.minnum.f32(float undef, float %277)
+  %282 = call float @llvm.minnum.f32(float %281, float %278)
+  %283 = fcmp ogt float %280, 0.000000e+00
+  %284 = fcmp olt float %280, 1.000000e+00
+  %285 = and i1 %283, %284
+  %286 = fcmp olt float %280, %282
+  %287 = and i1 %285, %286
+  br i1 %287, label %ENDIF2795, label %ELSE2806
 
 ELSE2806:                                         ; preds = %ELSE2803
-  %289 = fsub float %26, undef
-  %290 = fsub float %27, undef
-  %291 = fsub float %28, undef
-  %292 = fmul float %289, undef
-  %293 = fmul float %290, 0.000000e+00
-  %294 = fmul float %291, undef
-  %295 = fsub float %29, undef
-  %296 = fmul float %295, undef
-  %297 = call float @llvm.minnum.f32(float %292, float %296)
-  %298 = call float @llvm.minnum.f32(float %293, float undef)
-  %299 = call float @llvm.maxnum.f32(float %294, float undef)
-  %300 = call float @llvm.maxnum.f32(float %297, float %298)
-  %301 = call float @llvm.maxnum.f32(float %300, float undef)
-  %302 = call float @llvm.minnum.f32(float undef, float %299)
-  %303 = fcmp ogt float %301, 0.000000e+00
-  %304 = fcmp olt float %301, 1.000000e+00
-  %305 = and i1 %303, %304
-  %306 = fcmp olt float %301, %302
-  %307 = and i1 %305, %306
-  br i1 %307, label %ENDIF2795, label %ELSE2809
+  %288 = fsub float %26, undef
+  %289 = fsub float %27, undef
+  %290 = fsub float %28, undef
+  %291 = fmul float %288, undef
+  %292 = fmul float %289, 0.000000e+00
+  %293 = fmul float %290, undef
+  %294 = fsub float %29, undef
+  %295 = fmul float %294, undef
+  %296 = call float @llvm.minnum.f32(float %291, float %295)
+  %297 = call float @llvm.minnum.f32(float %292, float undef)
+  %298 = call float @llvm.maxnum.f32(float %293, float undef)
+  %299 = call float @llvm.maxnum.f32(float %296, float %297)
+  %300 = call float @llvm.maxnum.f32(float %299, float undef)
+  %301 = call float @llvm.minnum.f32(float undef, float %298)
+  %302 = fcmp ogt float %300, 0.000000e+00
+  %303 = fcmp olt float %300, 1.000000e+00
+  %304 = and i1 %302, %303
+  %305 = fcmp olt float %300, %301
+  %306 = and i1 %304, %305
+  br i1 %306, label %ENDIF2795, label %ELSE2809
 
 ELSE2809:                                         ; preds = %ELSE2806
   br i1 undef, label %ENDIF2795, label %ELSE2812
@@ -448,29 +450,29 @@ ELSE2818:
   br i1 undef, label %ENDIF2795, label %ELSE2821
 
 ELSE2821:                                         ; preds = %ELSE2818
-  %308 = fsub float %56, undef
-  %309 = fsub float %57, undef
-  %310 = fsub float %58, undef
-  %311 = fmul float %308, undef
-  %312 = fmul float %309, 0.000000e+00
-  %313 = fmul float %310, undef
-  %314 = fsub float %59, undef
-  %315 = fsub float %60, undef
-  %316 = fsub float %61, undef
+  %307 = fsub float %56, undef
+  %308 = fsub float %57, undef
+  %309 = fsub float %58, undef
+  %310 = fmul float %307, undef
+  %311 = fmul float %308, 0.000000e+00
+  %312 = fmul float %309, undef
+  %313 = fsub float %59, undef
+  %314 = fsub float %60, undef
+  %315 = fsub float %61, undef
+  %316 = fmul float %313, undef
   %317 = fmul float %314, undef
   %318 = fmul float %315, undef
-  %319 = fmul float %316, undef
+  %319 = call float @llvm.maxnum.f32(float %310, float %316)
   %320 = call float @llvm.maxnum.f32(float %311, float %317)
   %321 = call float @llvm.maxnum.f32(float %312, float %318)
-  %322 = call float @llvm.maxnum.f32(float %313, float %319)
-  %323 = call float @llvm.minnum.f32(float %320, float %321)
-  %324 = call float @llvm.minnum.f32(float %323, float %322)
-  %325 = fcmp ogt float undef, 0.000000e+00
-  %326 = fcmp olt float undef, 1.000000e+00
-  %327 = and i1 %325, %326
-  %328 = fcmp olt float undef, %324
-  %329 = and i1 %327, %328
-  br i1 %329, label %ENDIF2795, label %ELSE2824
+  %322 = call float @llvm.minnum.f32(float %319, float %320)
+  %323 = call float @llvm.minnum.f32(float %322, float %321)
+  %324 = fcmp ogt float undef, 0.000000e+00
+  %325 = fcmp olt float undef, 1.000000e+00
+  %326 = and i1 %324, %325
+  %327 = fcmp olt float undef, %323
+  %328 = and i1 %326, %327
+  br i1 %328, label %ENDIF2795, label %ELSE2824
 
 ELSE2824:                                         ; preds = %ELSE2821
   %.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
@@ -478,25 +480,22 @@ ELSE2824:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const(<16 x i8>, i32) #2
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.floor.f32(float) #1
+declare float @llvm.floor.f32(float) #2
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.sqrt.f32(float) #1
+declare float @llvm.sqrt.f32(float) #2
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.minnum.f32(float, float) #1
+declare float @llvm.minnum.f32(float, float) #2
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.maxnum.f32(float, float) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.lrp(float, float, float) #2
+declare float @llvm.maxnum.f32(float, float) #2
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 
-attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { readnone }
+attributes #0 = { alwaysinline nounwind readnone }
+attributes #1 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
+attributes #2 = { nounwind readnone }