[llvm] r295359 - AMDGPU: Remove llvm.AMDGPU.cube intrinsic

Thu Feb 16 11:09:05 PST 2017

Author: arsenm
Date: Thu Feb 16 13:09:04 2017
New Revision: 295359

URL: http://llvm.org/viewvc/llvm-project?rev=295359&view=rev
Log:
AMDGPU: Remove llvm.AMDGPU.cube intrinsic

Added:
    llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.cube.ll
      - copied, changed from r295358, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll
Removed:
    llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
    llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/test/CodeGen/AMDGPU/cube.ll
    llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================

--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Thu Feb 16 13:09:04 2017
@@ -64,6 +64,10 @@ def int_r600_recipsqrt_clamped : Intrins
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
 >;
 
+def int_r600_cube : Intrinsic<
+  [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
+>;
+
 } // End TargetPrefix = "r600"
 
 let TargetPrefix = "amdgcn" in {

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td Thu Feb 16 13:09:04 2017
@@ -20,9 +20,6 @@ let TargetPrefix = "AMDGPU", isTarget =
   // Deprecated in favor of llvm.amdgcn.sffbh
   def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
 
-  // Deprecated in favor of separate int_amdgcn_cube* intrinsics.
-  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
   // Deprecated in favor of expanded bit operations
   def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Thu Feb 16 13:09:04 2017
@@ -1013,7 +1013,7 @@ multiclass CUBE_Common <bits<11> inst> {
     (outs R600_Reg128:$dst),
     (ins R600_Reg128:$src0),
     "CUBE $dst $src0",
-    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
+    [(set v4f32:$dst, (int_r600_cube v4f32:$src0))],
     VecALU
   > {
     let isPseudo = 1;

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Feb 16 13:09:04 2017
@@ -818,27 +818,6 @@ def : Pat <
 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 
 def : Pat <
-  (int_AMDGPU_cube v4f32:$src),
-  (REG_SEQUENCE VReg_128,
-    (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub0,
-    (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub1,
-    (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub2,
-    (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub3)
->;
-
-def : Pat <
   (i32 (sext i1:$src0)),
   (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
 >;

Modified: llvm/trunk/test/CodeGen/AMDGPU/cube.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cube.ll?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cube.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cube.ll Thu Feb 16 13:09:04 2017
@@ -6,9 +6,6 @@ declare float @llvm.amdgcn.cubesc(float,
 declare float @llvm.amdgcn.cubetc(float, float, float) #0
 declare float @llvm.amdgcn.cubema(float, float, float) #0
 
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
-
-
 ; GCN-LABEL: {{^}}cube:
 ; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -29,18 +26,5 @@ define void @cube(<4 x float> addrspace(
   ret void
 }
 
-; GCN-LABEL: {{^}}legacy_cube:
-; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN: _store_dwordx4
-define void @legacy_cube(<4 x float> addrspace(1)* %out, <4 x float> %abcx) #1 {
-  %cube = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %abcx)
-  store <4 x float> %cube, <4 x float> addrspace(1)* %out
-  ret void
-}
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
-

Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll?rev=295358&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s
-
-; CHECK-LABEL: {{^}}cube:
-; CHECK: CUBE T{{[0-9]}}.X
-; CHECK: CUBE T{{[0-9]}}.Y
-; CHECK: CUBE T{{[0-9]}}.Z
-; CHECK: CUBE * T{{[0-9]}}.W
-define amdgpu_ps void @cube() {
-main_body:
-  %tmp = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %tmp1 = extractelement <4 x float> %tmp, i32 3
-  %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %tmp3 = extractelement <4 x float> %tmp2, i32 0
-  %tmp4 = fdiv float %tmp3, %tmp1
-  %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %tmp6 = extractelement <4 x float> %tmp5, i32 1
-  %tmp7 = fdiv float %tmp6, %tmp1
-  %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %tmp9 = extractelement <4 x float> %tmp8, i32 2
-  %tmp10 = fdiv float %tmp9, %tmp1
-  %tmp11 = insertelement <4 x float> undef, float %tmp4, i32 0
-  %tmp12 = insertelement <4 x float> %tmp11, float %tmp7, i32 1
-  %tmp13 = insertelement <4 x float> %tmp12, float %tmp10, i32 2
-  %tmp14 = insertelement <4 x float> %tmp13, float 1.000000e+00, i32 3
-  %tmp15 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp14)
-  %tmp16 = extractelement <4 x float> %tmp15, i32 0
-  %tmp17 = extractelement <4 x float> %tmp15, i32 1
-  %tmp18 = extractelement <4 x float> %tmp15, i32 2
-  %tmp19 = extractelement <4 x float> %tmp15, i32 3
-  %tmp20 = call float @llvm.fabs.f32(float %tmp18)
-  %tmp21 = fdiv float 1.000000e+00, %tmp20
-  %tmp22 = fmul float %tmp16, %tmp21
-  %tmp23 = fadd float %tmp22, 1.500000e+00
-  %tmp24 = fmul float %tmp17, %tmp21
-  %tmp25 = fadd float %tmp24, 1.500000e+00
-  %tmp26 = insertelement <4 x float> undef, float %tmp25, i32 0
-  %tmp27 = insertelement <4 x float> %tmp26, float %tmp23, i32 1
-  %tmp28 = insertelement <4 x float> %tmp27, float %tmp19, i32 2
-  %tmp29 = insertelement <4 x float> %tmp28, float %tmp25, i32 3
-  %tmp30 = shufflevector <4 x float> %tmp29, <4 x float> %tmp29, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %tmp31 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp30, i32 0, i32 0, i32 0, i32 16, i32 0, i32 1, i32 1, i32 1, i32 1)
-  call void @llvm.r600.store.swizzle(<4 x float> %tmp31, i32 0, i32 0)
-  ret void
-}
-
-; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.fabs.f32(float) #0
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }

Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.cube.ll (from r295358, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.cube.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.cube.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll&r1=295358&r2=295359&rev=295359&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.cube.ll Thu Feb 16 13:09:04 2017
@@ -22,7 +22,7 @@ main_body:
   %tmp12 = insertelement <4 x float> %tmp11, float %tmp7, i32 1
   %tmp13 = insertelement <4 x float> %tmp12, float %tmp10, i32 2
   %tmp14 = insertelement <4 x float> %tmp13, float 1.000000e+00, i32 3
-  %tmp15 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp14)
+  %tmp15 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp14)
   %tmp16 = extractelement <4 x float> %tmp15, i32 0
   %tmp17 = extractelement <4 x float> %tmp15, i32 1
   %tmp18 = extractelement <4 x float> %tmp15, i32 2
@@ -44,7 +44,7 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
+declare <4 x float> @llvm.r600.cube(<4 x float>) #0
 
 ; Function Attrs: nounwind readnone
 declare float @llvm.fabs.f32(float) #0

Modified: llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll Thu Feb 16 13:09:04 2017
@@ -10,7 +10,7 @@ main_body:
   %tmp6 = insertelement <4 x float> %tmp5, float %tmp2, i32 1
   %tmp7 = insertelement <4 x float> %tmp6, float %tmp3, i32 2
   %tmp8 = insertelement <4 x float> %tmp7, float %tmp4, i32 3
-  %tmp9 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp8)
+  %tmp9 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp8)
   %tmp10 = extractelement <4 x float> %tmp9, i32 0
   %tmp11 = extractelement <4 x float> %tmp9, i32 1
   %tmp12 = extractelement <4 x float> %tmp9, i32 2
@@ -45,7 +45,7 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
+declare <4 x float> @llvm.r600.cube(<4 x float>) #0
 
 ; Function Attrs: readnone
 declare float @fabs(float) #0

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=295359&r1=295358&r2=295359&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Thu Feb 16 13:09:04 2017
@@ -586,7 +586,19 @@ IF67:
   %tmp449 = insertelement <4 x float> %tmp448, float %tmp445, i32 1
   %tmp450 = insertelement <4 x float> %tmp449, float %tmp447, i32 2
   %tmp451 = insertelement <4 x float> %tmp450, float %tmp194, i32 3
-  %tmp452 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp451)
+
+  %tmp451.x = extractelement <4 x float> %tmp451, i32 0
+  %tmp451.y = extractelement <4 x float> %tmp451, i32 1
+  %tmp451.z = extractelement <4 x float> %tmp451, i32 2
+  %cubetc = call float @llvm.amdgcn.cubetc(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %cubesc = call float @llvm.amdgcn.cubesc(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %cubema = call float @llvm.amdgcn.cubema(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %cubeid = call float @llvm.amdgcn.cubeid(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %tmp452.0 = insertelement <4 x float> undef, float %cubetc, i32 0
+  %tmp452.1 = insertelement <4 x float> %tmp452.0, float %cubesc, i32 1
+  %tmp452.2 = insertelement <4 x float> %tmp452.1, float %cubema, i32 2
+  %tmp452 = insertelement <4 x float> %tmp452.2, float %cubeid, i32 3
+
   %tmp453 = extractelement <4 x float> %tmp452, i32 0
   %tmp454 = extractelement <4 x float> %tmp452, i32 1
   %tmp455 = extractelement <4 x float> %tmp452, i32 2
@@ -1841,9 +1853,6 @@ declare float @llvm.amdgcn.rsq.f32(float
 declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
-
-; Function Attrs: nounwind readnone
 declare float @llvm.fabs.f32(float) #0
 
 ; Function Attrs: nounwind readnone
@@ -1863,6 +1872,11 @@ declare float @llvm.amdgcn.interp.p1(flo
 ; Function Attrs: nounwind readnone
 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
 
+declare float @llvm.amdgcn.cubeid(float, float, float) #0
+declare float @llvm.amdgcn.cubesc(float, float, float) #0
+declare float @llvm.amdgcn.cubetc(float, float, float) #0
+declare float @llvm.amdgcn.cubema(float, float, float) #0
+
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }