[llvm] r258557 - AMDGPU: Rename intrinsics to use amdgcn prefix
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 22 13:30:35 PST 2016
Author: arsenm
Date: Fri Jan 22 15:30:34 2016
New Revision: 258557
URL: http://llvm.org/viewvc/llvm-project?rev=258557&view=rev
Log:
AMDGPU: Rename intrinsics to use amdgcn prefix
The intrinsic target prefix should match the target name
as it appears in the triple.
This is not yet complete, but gets most of the important ones.
llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled
for compatability for now.
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
- copied, changed from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll
llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll
llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
- copied, changed from r258544, llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll
Removed:
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll
llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll
llvm/trunk/test/CodeGen/AMDGPU/pv.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Fri Jan 22 15:30:34 2016
@@ -11,6 +11,10 @@
//
//===----------------------------------------------------------------------===//
+class AMDGPUReadPreloadRegisterIntrinsic<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<name>;
+
let TargetPrefix = "r600" in {
class R600ReadPreloadRegisterIntrinsic<string name>
@@ -41,15 +45,30 @@ def int_r600_rat_store_typed :
Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
GCCBuiltin<"__builtin_r600_rat_store_typed">;
+def int_r600_rsq : Intrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+>;
+
+def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+ "__builtin_r600_read_workdim"
+>;
+
} // End TargetPrefix = "r600"
+// FIXME: These should be renamed/moved to r600
let TargetPrefix = "AMDGPU" in {
+def int_AMDGPU_rsq_clamped : Intrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+>;
+
+def int_AMDGPU_ldexp : Intrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
+>;
+}
-class AMDGPUReadPreloadRegisterIntrinsic<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
+let TargetPrefix = "amdgcn" in {
-def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
+def int_amdgcn_div_scale : GCCBuiltin<"__builtin_amdgcn_div_scale">,
// 1st parameter: Numerator
// 2nd parameter: Denominator
// 3rd parameter: Constant to select select between first and
@@ -58,43 +77,39 @@ def int_AMDGPU_div_scale : GCCBuiltin<"_
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
[IntrNoMem]>;
-def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
+def int_amdgcn_div_fmas : GCCBuiltin<"__builtin_amdgcn_div_fmas">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
[IntrNoMem]>;
-def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
+def int_amdgcn_div_fixup : GCCBuiltin<"__builtin_amdgcn_div_fixup">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
-def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
+def int_amdgcn_trig_preop : GCCBuiltin<"__builtin_amdgcn_trig_preop">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem]>;
-def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
+def int_amdgcn_rcp : GCCBuiltin<"__builtin_amdgcn_rcp">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
+def int_amdgcn_rsq : GCCBuiltin<"__builtin_amdgcn_rsq">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
+def int_amdgcn_rsq_clamped : GCCBuiltin<"__builtin_amdgcn_rsq_clamped">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
+def int_amdgcn_ldexp : GCCBuiltin<"__builtin_amdgcn_ldexp">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
-def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
+def int_amdgcn_class : GCCBuiltin<"__builtin_amdgcn_class">,
Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
- "__builtin_amdgpu_read_workdim">;
-
-} // End TargetPrefix = "AMDGPU"
+def int_amdgcn_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+ "__builtin_amdgcn_read_workdim">;
-let TargetPrefix = "amdgcn" in {
-// SI only
def int_amdgcn_buffer_wbinvl1_sc :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
Intrinsic<[], [], []>;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Jan 22 15:30:34 2016
@@ -925,7 +925,7 @@ SDValue AMDGPUTargetLowering::LowerINTRI
return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::AMDGPU_div_scale: {
+ case Intrinsic::amdgcn_div_scale: {
// 3rd parameter required to be a constant.
const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
if (!Param)
@@ -947,28 +947,29 @@ SDValue AMDGPUTargetLowering::LowerINTRI
Denominator, Numerator);
}
- case Intrinsic::AMDGPU_div_fmas:
+ case Intrinsic::amdgcn_div_fmas:
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4));
- case Intrinsic::AMDGPU_div_fixup:
+ case Intrinsic::amdgcn_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::AMDGPU_trig_preop:
+ case Intrinsic::amdgcn_trig_preop:
return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
Op.getOperand(1), Op.getOperand(2));
- case Intrinsic::AMDGPU_rcp:
+ case Intrinsic::amdgcn_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
- case Intrinsic::AMDGPU_rsq:
+ case Intrinsic::amdgcn_rsq:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+ case Intrinsic::amdgcn_rsq_clamped:
case Intrinsic::AMDGPU_rsq_clamped:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Type *Type = VT.getTypeForEVT(*DAG.getContext());
@@ -984,7 +985,8 @@ SDValue AMDGPUTargetLowering::LowerINTRI
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
}
- case Intrinsic::AMDGPU_ldexp:
+ case Intrinsic::amdgcn_ldexp:
+ case Intrinsic::AMDGPU_ldexp: // Legacy name
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -1039,7 +1041,7 @@ SDValue AMDGPUTargetLowering::LowerINTRI
Op.getOperand(1),
Op.getOperand(2));
- case Intrinsic::AMDGPU_class:
+ case Intrinsic::amdgcn_class:
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
Op.getOperand(1), Op.getOperand(2));
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td Fri Jan 22 15:30:34 2016
@@ -61,10 +61,17 @@ let TargetPrefix = "AMDGPU", isTarget =
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ // Deprecated in favor of llvm.bitreverse
+ def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ // Deprecated in favor of llvm.amdgcn.s.barrier
def int_AMDGPU_barrier_local : Intrinsic<[], [], [IntrConvergent]>;
- def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>;
+ def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>;
+
+ // Deprecated in favor of llvm.amdgcn.read.workdim
+ def int_AMDGPU_read_workdim : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
}
// Legacy names for compatibility.
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Fri Jan 22 15:30:34 2016
@@ -781,7 +781,8 @@ SDValue R600TargetLowering::LowerOperati
case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
- case Intrinsic::AMDGPU_read_workdim: {
+ case Intrinsic::r600_read_workdim:
+ case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
}
@@ -804,7 +805,12 @@ SDValue R600TargetLowering::LowerOperati
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
- case Intrinsic::AMDGPU_rsq:
+
+ // FIXME: Should be renamed to r600 prefix
+ case Intrinsic::AMDGPU_rsq_clamped:
+ return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+
+ case Intrinsic::r600_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jan 22 15:30:34 2016
@@ -1312,7 +1312,8 @@ SDValue SITargetLowering::LowerINTRINSIC
case Intrinsic::r600_read_local_size_z:
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
- case Intrinsic::AMDGPU_read_workdim:
+ case Intrinsic::amdgcn_read_workdim:
+ case AMDGPUIntrinsic::AMDGPU_read_workdim: // Legacy name.
// Really only 2 bits.
return lowerImplicitZextParam(DAG, Op, MVT::i8,
getImplicitParameterOffset(MFI, GRID_DIM));
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Fri Jan 22 15:30:34 2016
@@ -1614,7 +1614,7 @@ Instruction *InstCombiner::visitCallInst
break;
}
- case Intrinsic::AMDGPU_rcp: {
+ case Intrinsic::amdgcn_rcp: {
if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
const APFloat &ArgVal = C->getValueAPF();
APFloat Val(ArgVal.getSemantics(), 1.0);
Modified: llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/big_alu.ll Fri Jan 22 15:30:34 2016
@@ -100,7 +100,7 @@ IF137:
%88 = insertelement <4 x float> %87, float %32, i32 2
%89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
%90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
- %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
+ %91 = call float @llvm.AMDGPU.rsq.clamped.f32(float %90)
%92 = fmul float %30, %91
%93 = fmul float %31, %91
%94 = fmul float %32, %91
@@ -343,7 +343,7 @@ ENDIF136:
%325 = insertelement <4 x float> %324, float %318, i32 2
%326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
%327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
- %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
+ %328 = call float @llvm.AMDGPU.rsq.clamped.f32(float %327)
%329 = fmul float %314, %328
%330 = fmul float %316, %328
%331 = fmul float %318, %328
@@ -376,7 +376,7 @@ ENDIF136:
%358 = insertelement <4 x float> %357, float %45, i32 2
%359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
%360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
- %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
+ %361 = call float @llvm.AMDGPU.rsq.clamped.f32(float %360)
%362 = fmul float %45, %361
%363 = call float @fabs(float %362)
%364 = fmul float %176, 0x3FECCCCCC0000000
@@ -402,7 +402,7 @@ ENDIF136:
%384 = insertelement <4 x float> %383, float %45, i32 2
%385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
%386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
- %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
+ %387 = call float @llvm.AMDGPU.rsq.clamped.f32(float %386)
%388 = fmul float %45, %387
%389 = call float @fabs(float %388)
%390 = fmul float %176, 0x3FF51EB860000000
@@ -1040,7 +1040,7 @@ IF179:
%896 = insertelement <4 x float> %895, float %45, i32 2
%897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
%898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
- %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
+ %899 = call float @llvm.AMDGPU.rsq.clamped.f32(float %898)
%900 = fmul float %45, %899
%901 = call float @fabs(float %900)
%902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1149,7 +1149,7 @@ ENDIF178:
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #1
+declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll (removed)
@@ -1,499 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
-declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
-declare i32 @llvm.r600.read.tidig.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
-
-; SI-LABEL: {{^}}test_class_f32:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_fabs_f32:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
- %a.fabs = call float @llvm.fabs.f32(float %a) #1
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_fneg_f32:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
- %a.fneg = fsub float -0.0, %a
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_fneg_fabs_f32:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
- %a.fabs = call float @llvm.fabs.f32(float %a) #1
- %a.fneg.fabs = fsub float -0.0, %a.fabs
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_1_f32:
-; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_64_f32:
-; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 64{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; Set all 10 bits of mask
-; SI-LABEL: {{^}}test_class_full_mask_f32:
-; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}}
-; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_9bit_mask_f32:
-; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
-; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}v_test_class_full_mask_f32:
-; SI-DAG: buffer_load_dword [[VA:v[0-9]+]]
-; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
-; SI: v_cmp_class_f32_e32 vcc, [[VA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f32:
-; SI-DAG: buffer_load_dword [[VB:v[0-9]+]]
-; SI: v_cmp_class_f32_e32 vcc, 1.0, [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %b = load i32, i32 addrspace(1)* %gep.in
-
- %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; FIXME: Why isn't this using a literal constant operand?
-; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f32:
-; SI-DAG: buffer_load_dword [[VB:v[0-9]+]]
-; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
-; SI: v_cmp_class_f32_e32 vcc, [[VK]], [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %b = load i32, i32 addrspace(1)* %gep.in
-
- %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_f64:
-; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_fabs_f64:
-; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
- %a.fabs = call double @llvm.fabs.f64(double %a) #1
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_fneg_f64:
-; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
- %a.fneg = fsub double -0.0, %a
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_fneg_fabs_f64:
-; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
- %a.fabs = call double @llvm.fabs.f64(double %a) #1
- %a.fneg.fabs = fsub double -0.0, %a.fabs
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_1_f64:
-; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}}
-; SI: s_endpgm
-define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_64_f64:
-; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}}
-; SI: s_endpgm
-define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; Set all 9 bits of mask
-; SI-LABEL: {{^}}test_class_full_mask_f64:
-; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
-; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]]
-; SI-NOT: vcc
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}v_test_class_full_mask_f64:
-; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
-; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]]
-; SI-NOT: vcc
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load double, double addrspace(1)* %in
-
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f64:
-; XSI: v_cmp_class_f64_e32 vcc, 1.0,
-; SI: v_cmp_class_f64_e32 vcc,
-; SI: s_endpgm
-define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %b = load i32, i32 addrspace(1)* %gep.in
-
- %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64:
-; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
-; SI: s_endpgm
-define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %b = load i32, i32 addrspace(1)* %gep.in
-
- %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_fold_or_class_f32_0:
-; SI-NOT: v_cmp_class
-; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}}
-; SI-NOT: v_cmp_class
-; SI: s_endpgm
-define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
- %or = or i1 %class0, %class1
-
- %sext = sext i1 %or to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_fold_or3_class_f32_0:
-; SI-NOT: v_cmp_class
-; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
-; SI-NOT: v_cmp_class
-; SI: s_endpgm
-define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
- %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %or.0 = or i1 %class0, %class1
- %or.1 = or i1 %or.0, %class2
-
- %sext = sext i1 %or.1 to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_fold_or_all_tests_class_f32_0:
-; SI-NOT: v_cmp_class
-; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}}
-; SI: v_cmp_class_f32_e32 vcc, v{{[0-9]+}}, [[MASK]]{{$}}
-; SI-NOT: v_cmp_class
-; SI: s_endpgm
-define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
- %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
- %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1
- %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1
- %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
- %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1
- %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1
- %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1
- %or.0 = or i1 %class0, %class1
- %or.1 = or i1 %or.0, %class2
- %or.2 = or i1 %or.1, %class3
- %or.3 = or i1 %or.2, %class4
- %or.4 = or i1 %or.3, %class5
- %or.5 = or i1 %or.4, %class6
- %or.6 = or i1 %or.5, %class7
- %or.7 = or i1 %or.6, %class8
- %or.8 = or i1 %or.7, %class9
- %sext = sext i1 %or.8 to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_fold_or_class_f32_1:
-; SI-NOT: v_cmp_class
-; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}}
-; SI-NOT: v_cmp_class
-; SI: s_endpgm
-define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
- %or = or i1 %class0, %class1
-
- %sext = sext i1 %or to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_fold_or_class_f32_2:
-; SI-NOT: v_cmp_class
-; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
-; SI-NOT: v_cmp_class
-; SI: s_endpgm
-define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
- %or = or i1 %class0, %class1
-
- %sext = sext i1 %or to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_no_fold_or_class_f32_0:
-; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 4{{$}}
-; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}}
-; SI: s_or_b64
-; SI: s_endpgm
-define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
- %tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load float, float addrspace(1)* %gep.in
-
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
- %or = or i1 %class0, %class1
-
- %sext = sext i1 %or to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_0_f32:
-; SI-NOT: v_cmp_class
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_class_0_f64:
-; SI-NOT: v_cmp_class
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
-define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1
- %sext = sext i1 %result to i32
- store i32 %sext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll (removed)
@@ -1,31 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
-
-declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
-declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
-
-; GCN-LABEL: {{^}}test_div_fixup_f32:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
-; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
-define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fixup_f64:
-; GCN: v_div_fixup_f64
-define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
- %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
- store double %result, double addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll (removed)
@@ -1,178 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s
-
-; FIXME: Enable for VI.
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
-declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
-
-; GCN-LABEL: {{^}}test_div_fmas_f32:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
-; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
-; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
-; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VB]], [[VA]], [[VC]]
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
-define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_0:
-; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VB]], [[VC]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_1:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
-; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VA]], [[VC]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_2:
-; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], 1.0
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f64:
-; GCN: v_div_fmas_f64
-define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
- %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
- store double %result, double addrspace(1)* %out, align 8
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
-; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
-; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
- %cmp = icmp eq i32 %i, 0
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_imm_false_cond_to_vcc:
-; SI: s_mov_b64 vcc, 0
-; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_imm_true_cond_to_vcc:
-; SI: s_mov_b64 vcc, -1
-; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_logical_cond_to_vcc:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
-; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-
-; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}}
-; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
-; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
-; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
-; SI: s_endpgm
-define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
- %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
-
- %a = load float, float addrspace(1)* %gep.a
- %b = load float, float addrspace(1)* %gep.b
- %c = load float, float addrspace(1)* %gep.c
-
- %cmp0 = icmp eq i32 %tid, 0
- %cmp1 = icmp ne i32 %d, 0
- %and = and i1 %cmp0, %cmp1
-
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
- store float %result, float addrspace(1)* %gep.out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
-; SI: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}}
-; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
-; SI: s_xor_b64 [[SAVE]], exec, [[SAVE]]
-
-; SI: buffer_load_dword [[LOAD:v[0-9]+]]
-; SI: v_cmp_ne_i32_e32 vcc, 0, [[LOAD]]
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-
-
-; SI: BB9_2:
-; SI: s_or_b64 exec, exec, [[SAVE]]
-; SI: v_cmp_ne_i32_e32 vcc, 0, v0
-; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
-entry:
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
- %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
- %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
-
- %a = load float, float addrspace(1)* %gep.a
- %b = load float, float addrspace(1)* %gep.b
- %c = load float, float addrspace(1)* %gep.c
-
- %cmp0 = icmp eq i32 %tid, 0
- br i1 %cmp0, label %bb, label %exit
-
-bb:
- %val = load i32, i32 addrspace(1)* %dummy
- %cmp1 = icmp ne i32 %val, 0
- br label %exit
-
-exit:
- %cond = phi i1 [false, %entry], [%cmp1, %bb]
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
- store float %result, float addrspace(1)* %gep.out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll (removed)
@@ -1,364 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
-declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
-declare float @llvm.fabs.f32(float) nounwind readnone
-
-; SI-LABEL @test_div_scale_f32_1:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_2:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_1:
-; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-
- %a = load double, double addrspace(1)* %gep.0, align 8
- %b = load double, double addrspace(1)* %gep.1, align 8
-
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_1:
-; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-
- %a = load double, double addrspace(1)* %gep.0, align 8
- %b = load double, double addrspace(1)* %gep.1, align 8
-
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_scalar_num_1:
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
-; SI-DAG: s_load_dword [[A:s[0-9]+]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
-
- %b = load float, float addrspace(1)* %gep, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_scalar_num_2:
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
-; SI-DAG: s_load_dword [[A:s[0-9]+]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
-
- %b = load float, float addrspace(1)* %gep, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_scalar_den_1:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
-; SI-DAG: s_load_dword [[B:s[0-9]+]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
-
- %a = load float, float addrspace(1)* %gep, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_scalar_den_2:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
-; SI-DAG: s_load_dword [[B:s[0-9]+]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
-
- %a = load float, float addrspace(1)* %gep, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_scalar_num_1:
-; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-
- %b = load double, double addrspace(1)* %gep, align 8
-
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_scalar_num_2:
-; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-
- %b = load double, double addrspace(1)* %gep, align 8
-
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_scalar_den_1:
-; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-
- %a = load double, double addrspace(1)* %gep, align 8
-
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_scalar_den_2:
-; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-
- %a = load double, double addrspace(1)* %gep, align 8
-
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_all_scalar_1:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_all_scalar_2:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_all_scalar_1:
-; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
-; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f64_all_scalar_2:
-; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
-; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
-; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
-; SI: buffer_store_dwordx2 [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
- %result0 = extractvalue { double, i1 } %result, 0
- store double %result0, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_inline_imm_num:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %a = load float, float addrspace(1)* %gep.0, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_inline_imm_den:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %a = load float, float addrspace(1)* %gep.0, align 4
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_fabs_num:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL @test_div_scale_f32_fabs_den:
-; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
-; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
-; SI: buffer_store_dword [[RESULT0]]
-; SI: s_endpgm
-define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-
- %a = load float, float addrspace(1)* %gep.0, align 4
- %b = load float, float addrspace(1)* %gep.1, align 4
-
- %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
-
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
- %result0 = extractvalue { float, i1 } %result, 0
- store float %result0, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone
-declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
-
-; SI-LABEL: {{^}}test_ldexp_f32:
-; SI: v_ldexp_f32
-; SI: s_endpgm
-define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
- %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
- store float %result, float addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: {{^}}test_ldexp_f64:
-; SI: v_ldexp_f64
-; SI: s_endpgm
-define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
- %result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
- store double %result, double addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll (removed)
@@ -1,33 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
-
-; FUNC-LABEL: {{^}}rcp_f64:
-; SI: v_rcp_f64_e32
-define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
- %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}rcp_pat_f64:
-; SI: v_rcp_f64_e32
-define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
- %rcp = fdiv double 1.0, %src
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
-; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE-NOT: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64
-; SI-SAFE: v_rcp_f64
-define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
- %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
- %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll (removed)
@@ -1,50 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
-
-declare float @llvm.sqrt.f32(float) nounwind readnone
-
-; FUNC-LABEL: {{^}}rcp_f32:
-; SI: v_rcp_f32_e32
-; EG: RECIP_IEEE
-define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
- %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
- store float %rcp, float addrspace(1)* %out, align 4
- ret void
-}
-
-; FIXME: Evergreen only ever does unsafe fp math.
-; FUNC-LABEL: {{^}}rcp_pat_f32:
-
-; SI-SAFE: v_rcp_f32_e32
-; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
-
-; EG: RECIP_IEEE
-
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
- %rcp = fdiv float 1.0, %src
- store float %rcp, float addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f32:
-; SI-UNSAFE: v_rsq_f32_e32
-; SI-SAFE: v_sqrt_f32_e32
-; SI-SAFE: v_rcp_f32_e32
-
-; EG: RECIPSQRT_IEEE
-define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
- %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
- %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
- store float %rcp, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}read_workdim:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV * [[VAL]], KC0[2].Z
-
-; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
-; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
-; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN-NOHSA: buffer_store_dword [[VVAL]]
-define void @read_workdim(i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.AMDGPU.read.workdim() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}read_workdim_known_bits:
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
-; GCN-NOT: 0xff
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
-entry:
- %dim = call i32 @llvm.AMDGPU.read.workdim() #0
- %shl = shl i32 %dim, 24
- %shr = lshr i32 %shl, 24
- store i32 %shr, i32 addrspace(1)* %out
- ret void
-}
-
-declare i32 @llvm.AMDGPU.read.workdim() #0
-
-attributes #0 = { readnone }
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll (removed)
@@ -1,23 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
-
-declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
-
-; FUNC-LABEL: {{^}}rsq_clamped_f64:
-; SI: v_rsq_clamp_f64_e32
-
-; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
-; TODO: this constant should be folded:
-; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
-; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
-; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
-; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
-; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
-; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
-; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
-
-define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
- %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
- store double %rsq_clamped, double addrspace(1)* %out, align 8
- ret void
-}
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll Fri Jan 22 15:30:34 2016
@@ -2,6 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: Uses of this should be moved to llvm.amdgcn.rsq.clamped, and
+; an r600 variant added.
declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll (removed)
@@ -1,33 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
-
-; FUNC-LABEL: {{^}}rsq_f32:
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; EG: RECIPSQRT_IEEE
-define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
- %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
- store float %rsq, float addrspace(1)* %out, align 4
- ret void
-}
-
-; TODO: Really these should be constant folded
-; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
-; EG: RECIPSQRT_IEEE
-define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
- %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
- store float %rsq, float addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
-; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
-; EG: RECIPSQRT_IEEE
-define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
- %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
- store float %rsq, float addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll (removed)
@@ -1,30 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
-
-; SI-LABEL: {{^}}test_trig_preop_f64:
-; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
-; SI-DAG: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
-define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load double, double addrspace(1)* %aptr, align 8
- %b = load i32, i32 addrspace(1)* %bptr, align 4
- %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
- store double %result, double addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: {{^}}test_trig_preop_f64_imm_segment:
-; SI: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
-define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
- %a = load double, double addrspace(1)* %aptr, align 8
- %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
- store double %result, double addrspace(1)* %out, align 8
- ret void
-}
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll Fri Jan 22 15:30:34 2016
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
-declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
+declare i1 @llvm.amdgcn.class.f32(float, i32) #1
+declare i1 @llvm.amdgcn.class.f64(double, i32) #1
declare i32 @llvm.r600.read.tidig.x() #1
declare float @llvm.fabs.f32(float) #1
declare double @llvm.fabs.f64(double) #1
@@ -15,7 +15,7 @@ declare double @llvm.fabs.f64(double) #1
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -31,7 +31,7 @@ define void @test_class_f32(i32 addrspac
; SI: s_endpgm
define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a) #1
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -47,7 +47,7 @@ define void @test_class_fabs_f32(i32 add
; SI: s_endpgm
define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%a.fneg = fsub float -0.0, %a
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -64,7 +64,7 @@ define void @test_class_fneg_f32(i32 add
define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a) #1
%a.fneg.fabs = fsub float -0.0, %a.fabs
- %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -77,7 +77,7 @@ define void @test_class_fneg_fabs_f32(i3
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -90,7 +90,7 @@ define void @test_class_1_f32(i32 addrsp
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -105,7 +105,7 @@ define void @test_class_64_f32(i32 addrs
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -119,7 +119,7 @@ define void @test_class_full_mask_f32(i3
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -138,7 +138,7 @@ define void @v_test_class_full_mask_f32(
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -156,7 +156,7 @@ define void @test_class_inline_imm_const
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -176,7 +176,7 @@ define void @test_class_lit_constant_dyn
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -191,7 +191,7 @@ define void @test_class_lit_constant_dyn
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -207,7 +207,7 @@ define void @test_class_f64(i32 addrspac
; SI: s_endpgm
define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
%a.fabs = call double @llvm.fabs.f64(double %a) #1
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -223,7 +223,7 @@ define void @test_class_fabs_f64(i32 add
; SI: s_endpgm
define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
%a.fneg = fsub double -0.0, %a
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -240,7 +240,7 @@ define void @test_class_fneg_f64(i32 add
define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
%a.fabs = call double @llvm.fabs.f64(double %a) #1
%a.fneg.fabs = fsub double -0.0, %a.fabs
- %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -250,7 +250,7 @@ define void @test_class_fneg_fabs_f64(i3
; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}}
; SI: s_endpgm
define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -260,7 +260,7 @@ define void @test_class_1_f64(i32 addrsp
; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}}
; SI: s_endpgm
define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -276,7 +276,7 @@ define void @test_class_64_f64(i32 addrs
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -296,7 +296,7 @@ define void @v_test_class_full_mask_f64(
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load double, double addrspace(1)* %in
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -312,7 +312,7 @@ define void @test_class_inline_imm_const
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -327,7 +327,7 @@ define void @test_class_lit_constant_dyn
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.in
- %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %gep.out, align 4
ret void
@@ -344,8 +344,8 @@ define void @test_fold_or_class_f32_0(i3
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -364,9 +364,9 @@ define void @test_fold_or3_class_f32_0(i
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
- %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1
+ %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
%or.0 = or i1 %class0, %class1
%or.1 = or i1 %or.0, %class2
@@ -387,16 +387,16 @@ define void @test_fold_or_all_tests_clas
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
- %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
- %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1
- %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1
- %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
- %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1
- %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1
- %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1
+ %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+ %class3 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1
+ %class4 = call i1 @llvm.amdgcn.class.f32(float %a, i32 16) #1
+ %class5 = call i1 @llvm.amdgcn.class.f32(float %a, i32 32) #1
+ %class6 = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
+ %class7 = call i1 @llvm.amdgcn.class.f32(float %a, i32 128) #1
+ %class8 = call i1 @llvm.amdgcn.class.f32(float %a, i32 256) #1
+ %class9 = call i1 @llvm.amdgcn.class.f32(float %a, i32 512) #1
%or.0 = or i1 %class0, %class1
%or.1 = or i1 %or.0, %class2
%or.2 = or i1 %or.1, %class3
@@ -422,8 +422,8 @@ define void @test_fold_or_class_f32_1(i3
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -442,8 +442,8 @@ define void @test_fold_or_class_f32_2(i3
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -462,8 +462,8 @@ define void @test_no_fold_or_class_f32_0
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load float, float addrspace(1)* %gep.in
- %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
- %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
+ %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+ %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) #1
%or = or i1 %class0, %class1
%sext = sext i1 %or to i32
@@ -477,7 +477,7 @@ define void @test_no_fold_or_class_f32_0
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1
+ %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
@@ -489,7 +489,7 @@ define void @test_class_0_f32(i32 addrsp
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
- %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1
+ %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll Fri Jan 22 15:30:34 2016
@@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
-declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
-declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+declare float @llvm.amdgcn.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.amdgcn.div.fixup.f64(double, double, double) nounwind readnone
; GCN-LABEL: {{^}}test_div_fixup_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
@@ -17,7 +17,7 @@ declare double @llvm.AMDGPU.div.fixup.f6
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -25,7 +25,7 @@ define void @test_div_fixup_f32(float ad
; GCN-LABEL: {{^}}test_div_fixup_f64:
; GCN: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
- %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+ %result = call double @llvm.amdgcn.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll Fri Jan 22 15:30:34 2016
@@ -4,8 +4,8 @@
; FIXME: Enable for VI.
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
-declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
+declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone
+declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone
; GCN-LABEL: {{^}}test_div_fmas_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
@@ -21,7 +21,7 @@ declare double @llvm.AMDGPU.div.fmas.f64
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -35,7 +35,7 @@ define void @test_div_fmas_f32(float add
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -49,7 +49,7 @@ define void @test_div_fmas_f32_inline_im
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -63,7 +63,7 @@ define void @test_div_fmas_f32_inline_im
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -71,7 +71,7 @@ define void @test_div_fmas_f32_inline_im
; GCN-LABEL: {{^}}test_div_fmas_f64:
; GCN: v_div_fmas_f64
define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
- %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
+ %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -81,7 +81,7 @@ define void @test_div_fmas_f64(double ad
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
%cmp = icmp eq i32 %i, 0
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -90,7 +90,7 @@ define void @test_div_fmas_f32_cond_to_v
; SI: s_mov_b64 vcc, 0
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -99,7 +99,7 @@ define void @test_div_fmas_f32_imm_false
; SI: s_mov_b64 vcc, -1
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -129,7 +129,7 @@ define void @test_div_fmas_f32_logical_c
%cmp1 = icmp ne i32 %d, 0
%and = and i1 %cmp0, %cmp1
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
store float %result, float addrspace(1)* %gep.out, align 4
ret void
}
@@ -172,7 +172,7 @@ bb:
exit:
%cond = phi i1 [false, %entry], [%cmp1, %bb]
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
+ %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
store float %result, float addrspace(1)* %gep.out, align 4
ret void
}
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll Fri Jan 22 15:30:34 2016
@@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
-declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone
+declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
; SI-LABEL @test_div_scale_f32_1:
@@ -19,7 +19,7 @@ define void @test_div_scale_f32_1(float
%a = load float, float addrspace(1)* %gep.0, align 4
%b = load float, float addrspace(1)* %gep.1, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -39,7 +39,7 @@ define void @test_div_scale_f32_2(float
%a = load float, float addrspace(1)* %gep.0, align 4
%b = load float, float addrspace(1)* %gep.1, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -59,7 +59,7 @@ define void @test_div_scale_f64_1(double
%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -79,7 +79,7 @@ define void @test_div_scale_f64_2(double
%a = load double, double addrspace(1)* %gep.0, align 8
%b = load double, double addrspace(1)* %gep.1, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -97,7 +97,7 @@ define void @test_div_scale_f32_scalar_n
%b = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -115,7 +115,7 @@ define void @test_div_scale_f32_scalar_n
%b = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -133,7 +133,7 @@ define void @test_div_scale_f32_scalar_d
%a = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -151,7 +151,7 @@ define void @test_div_scale_f32_scalar_d
%a = load float, float addrspace(1)* %gep, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -169,7 +169,7 @@ define void @test_div_scale_f64_scalar_n
%b = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -187,7 +187,7 @@ define void @test_div_scale_f64_scalar_n
%b = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -205,7 +205,7 @@ define void @test_div_scale_f64_scalar_d
%a = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -223,7 +223,7 @@ define void @test_div_scale_f64_scalar_d
%a = load double, double addrspace(1)* %gep, align 8
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -237,7 +237,7 @@ define void @test_div_scale_f64_scalar_d
; SI: buffer_store_dword [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -251,7 +251,7 @@ define void @test_div_scale_f32_all_scal
; SI: buffer_store_dword [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -266,7 +266,7 @@ define void @test_div_scale_f32_all_scal
; SI: buffer_store_dwordx2 [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -281,7 +281,7 @@ define void @test_div_scale_f64_all_scal
; SI: buffer_store_dwordx2 [[RESULT0]]
; SI: s_endpgm
define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
- %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
ret void
@@ -297,7 +297,7 @@ define void @test_div_scale_f32_inline_i
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%a = load float, float addrspace(1)* %gep.0, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -313,7 +313,7 @@ define void @test_div_scale_f32_inline_i
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%a = load float, float addrspace(1)* %gep.0, align 4
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -335,7 +335,7 @@ define void @test_div_scale_f32_fabs_num
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
@@ -357,7 +357,7 @@ define void @test_div_scale_f32_fabs_den
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
- %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
ret void
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll Fri Jan 22 15:30:34 2016
@@ -1,6 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone
+declare double @llvm.amdgcn.ldexp.f64(double, i32) nounwind readnone
+
declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone
declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
@@ -8,7 +11,7 @@ declare double @llvm.AMDGPU.ldexp.f64(do
; SI: v_ldexp_f32
; SI: s_endpgm
define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
- %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
+ %result = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
@@ -17,6 +20,24 @@ define void @test_ldexp_f32(float addrsp
; SI: v_ldexp_f64
; SI: s_endpgm
define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
+ %result = call double @llvm.amdgcn.ldexp.f64(double %a, i32 %b) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: {{^}}test_legacy_ldexp_f32:
+; SI: v_ldexp_f32
+; SI: s_endpgm
+define void @test_legacy_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
+ %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_legacy_ldexp_f64:
+; SI: v_ldexp_f64
+; SI: s_endpgm
+define void @test_legacy_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
%result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll Fri Jan 22 15:30:34 2016
@@ -5,32 +5,26 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+declare float @llvm.amdgcn.rcp.f32(float) #0
+declare double @llvm.amdgcn.rcp.f64(double) #0
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+declare double @llvm.sqrt.f64(double) #0
+declare float @llvm.sqrt.f32(float) #0
-declare float @llvm.sqrt.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rcp_f32:
; SI: v_rcp_f32_e32
-; EG: RECIP_IEEE
-define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
- %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+define void @rcp_f32(float addrspace(1)* %out, float %src) #1 {
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %src) #0
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-; FIXME: Evergreen only ever does unsafe fp math.
; FUNC-LABEL: {{^}}rcp_pat_f32:
; SI-SAFE: v_rcp_f32_e32
; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
-
-; EG: RECIP_IEEE
-
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
@@ -40,11 +34,40 @@ define void @rcp_pat_f32(float addrspace
; SI-UNSAFE: v_rsq_f32_e32
; SI-SAFE: v_sqrt_f32_e32
; SI-SAFE: v_rcp_f32_e32
-
-; EG: RECIPSQRT_IEEE
-define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
- %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
- %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
+ %sqrt = call float @llvm.sqrt.f32(float %src) #0
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) #0
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}rcp_f64:
+; SI: v_rcp_f64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %src) #0
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rcp_pat_f64:
+; SI: v_rcp_f64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
+ %rcp = fdiv double 1.0, %src
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE-NOT: v_rsq_f64_e32
+; SI-SAFE: v_sqrt_f64
+; SI-SAFE: v_rcp_f64
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
+ %sqrt = call double @llvm.sqrt.f64(double %src) #0
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) #0
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll Fri Jan 22 15:30:34 2016
@@ -1,23 +1,19 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}read_workdim:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV * [[VAL]], KC0[2].Z
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA %s
+; GCN-LABEL: {{^}}read_workdim:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @read_workdim(i32 addrspace(1)* %out) {
entry:
- %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+ %0 = call i32 @llvm.amdgcn.read.workdim() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}read_workdim_known_bits:
+; GCN-LABEL: {{^}}read_workdim_known_bits:
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
; GCN-NOT: 0xff
@@ -25,13 +21,26 @@ entry:
; GCN: buffer_store_dword [[VVAL]]
define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
entry:
- %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ %dim = call i32 @llvm.amdgcn.read.workdim() #0
%shl = shl i32 %dim, 24
%shr = lshr i32 %shl, 24
store i32 %shr, i32 addrspace(1)* %out
ret void
}
+; GCN-LABEL: {{^}}legacy_read_workdim:
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+define void @legacy_read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %dim, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.read.workdim() #0
declare i32 @llvm.AMDGPU.read.workdim() #0
-attributes #0 = { readnone }
+attributes #0 = { nounwind readnone }
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll Fri Jan 22 15:30:34 2016
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
-declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
+declare double @llvm.amdgcn.rsq.clamped.f64(double) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f64:
; SI: v_rsq_clamp_f64_e32
@@ -17,7 +17,7 @@ declare double @llvm.AMDGPU.rsq.clamped.
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
- %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
+ %rsq_clamped = call double @llvm.amdgcn.rsq.clamped.f64(double %src) nounwind readnone
store double %rsq_clamped, double addrspace(1)* %out, align 8
ret void
}
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll?rev=258557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll Fri Jan 22 15:30:34 2016
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.amdgcn.rsq.f32(float) #0
+declare double @llvm.amdgcn.rsq.f64(double) #0
+
+; FUNC-LABEL: {{^}}rsq_f32:
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define void @rsq_f32(float addrspace(1)* %out, float %src) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float %src) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
+define void @rsq_f32_constant_4.0(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
+define void @rsq_f32_constant_100.0(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f64:
+; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @rsq_f64(double addrspace(1)* %out, double %src) #1 {
+ %rsq = call double @llvm.amdgcn.rsq.f64(double %src) #0
+ store double %rsq, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_f64_constant_4.0
+; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 4.0
+define void @rsq_f64_constant_4.0(double addrspace(1)* %out) #1 {
+ %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) #0
+ store double %rsq, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f64_constant_100.0
+; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0x40590000
+; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0{{$}}
+; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 {
+ %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0
+ store double %rsq, double addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll (from r258544, llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll Fri Jan 22 15:30:34 2016
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone
; SI-LABEL: {{^}}test_trig_preop_f64:
; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
@@ -12,7 +12,7 @@ declare double @llvm.AMDGPU.trig.preop.f
define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load double, double addrspace(1)* %aptr, align 8
%b = load i32, i32 addrspace(1)* %bptr, align 4
- %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -24,7 +24,7 @@ define void @test_trig_preop_f64(double
; SI: s_endpgm
define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
%a = load double, double addrspace(1)* %aptr, align 8
- %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll?rev=258557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll Fri Jan 22 15:30:34 2016
@@ -0,0 +1,36 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s
+
+; EG-LABEL: {{^}}read_workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.r600.read.workdim() #0
+ store i32 %dim, i32 addrspace(1)* %out
+ ret void
+}
+
+; EG-LABEL: {{^}}read_workdim_known_bits:
+define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.r600.read.workdim() #0
+ %shl = shl i32 %dim, 24
+ %shr = lshr i32 %shl, 24
+ store i32 %shr, i32 addrspace(1)* %out
+ ret void
+}
+
+; EG-LABEL: {{^}}legacy_read_workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @legacy_read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %dim, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.workdim() #0
+declare i32 @llvm.AMDGPU.read.workdim() #0
+
+attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/pv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pv.ll?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pv.ll Fri Jan 22 15:30:34 2016
@@ -103,7 +103,7 @@ main_body:
%95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
%96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
%97 = call float @fabs(float %96)
- %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
+ %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97)
%99 = fmul float %4, %98
%100 = fmul float %5, %98
%101 = fmul float %6, %98
@@ -119,10 +119,10 @@ main_body:
%111 = extractelement <4 x float> %110, i32 2
%112 = fmul float %111, %10
%113 = fadd float %112, %22
- %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
- %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
- %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
- %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+ %114 = call float @llvm.AMDGPU.clamp.f32(float %105, float 0.000000e+00, float 1.000000e+00)
+ %115 = call float @llvm.AMDGPU.clamp.f32(float %109, float 0.000000e+00, float 1.000000e+00)
+ %116 = call float @llvm.AMDGPU.clamp.f32(float %113, float 0.000000e+00, float 1.000000e+00)
+ %117 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00)
%118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%119 = extractelement <4 x float> %118, i32 0
%120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
@@ -202,9 +202,9 @@ main_body:
%194 = fadd float %193, %188
%195 = fmul float %181, %174
%196 = fadd float %195, %190
- %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00)
- %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00)
- %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00)
+ %197 = call float @llvm.AMDGPU.clamp.f32(float %192, float 0.000000e+00, float 1.000000e+00)
+ %198 = call float @llvm.AMDGPU.clamp.f32(float %194, float 0.000000e+00, float 1.000000e+00)
+ %199 = call float @llvm.AMDGPU.clamp.f32(float %196, float 0.000000e+00, float 1.000000e+00)
%200 = insertelement <4 x float> undef, float %75, i32 0
%201 = insertelement <4 x float> %200, float %79, i32 1
%202 = insertelement <4 x float> %201, float %83, i32 2
@@ -225,10 +225,10 @@ declare float @llvm.AMDGPU.dp4(<4 x floa
declare float @fabs(float) #2
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #1
+declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #1
+declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3
Added: llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll?rev=258557&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/rcp-pattern.ll Fri Jan 22 15:30:34 2016
@@ -0,0 +1,11 @@
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: Evergreen only ever does unsafe fp math.
+; FUNC-LABEL: {{^}}rcp_pat_f32:
+; EG: RECIP_IEEE
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+ %rcp = fdiv float 1.0, %src
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Fri Jan 22 15:30:34 2016
@@ -71,7 +71,7 @@ main_body:
%55 = fadd float %54, %53
%56 = fmul float %45, %45
%57 = fadd float %55, %56
- %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
+ %58 = call float @llvm.amdgcn.rsq.f32(float %57)
%59 = fmul float %43, %58
%60 = fmul float %44, %58
%61 = fmul float %45, %58
@@ -213,7 +213,7 @@ declare float @llvm.SI.fs.interp(i32, i3
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #3
+declare float @llvm.amdgcn.rsq.f32(float) #3
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #3
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=258557&r1=258556&r2=258557&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Fri Jan 22 15:30:34 2016
@@ -215,7 +215,7 @@ main_body:
%198 = fadd float %197, %196
%199 = fmul float %97, %97
%200 = fadd float %198, %199
- %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
+ %201 = call float @llvm.amdgcn.rsq.f32(float %200)
%202 = fmul float %95, %201
%203 = fmul float %96, %201
%204 = fmul float %202, %29
@@ -396,7 +396,7 @@ IF67:
%355 = fadd float %354, %353
%356 = fmul float %352, %352
%357 = fadd float %355, %356
- %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
+ %358 = call float @llvm.amdgcn.rsq.f32(float %357)
%359 = fmul float %350, %358
%360 = fmul float %351, %358
%361 = fmul float %352, %358
@@ -524,7 +524,7 @@ IF67:
%483 = fadd float %482, %481
%484 = fmul float %109, %109
%485 = fadd float %483, %484
- %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
+ %486 = call float @llvm.amdgcn.rsq.f32(float %485)
%487 = fmul float %107, %486
%488 = fmul float %108, %486
%489 = fmul float %109, %486
@@ -553,7 +553,7 @@ IF67:
%512 = fadd float %511, %510
%513 = fmul float %97, %97
%514 = fadd float %512, %513
- %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
+ %515 = call float @llvm.amdgcn.rsq.f32(float %514)
%516 = fmul float %95, %515
%517 = fmul float %96, %515
%518 = fmul float %97, %515
@@ -670,7 +670,7 @@ declare i32 @llvm.SI.tid() #2
declare float @ceil(float) #3
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq.f32(float) #2
+declare float @llvm.amdgcn.rsq.f32(float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -899,7 +899,7 @@ main_body:
%212 = fadd float %211, %210
%213 = fmul float %209, %209
%214 = fadd float %212, %213
- %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
+ %215 = call float @llvm.amdgcn.rsq.f32(float %214)
%216 = fmul float %205, %215
%217 = fmul float %207, %215
%218 = fmul float %209, %215
@@ -1135,7 +1135,7 @@ IF189:
%434 = fsub float -0.000000e+00, %433
%435 = fadd float 0x3FF00068E0000000, %434
%436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
- %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
+ %437 = call float @llvm.amdgcn.rsq.f32(float %436)
%438 = fmul float %437, %436
%439 = fsub float -0.000000e+00, %436
%440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1159,7 +1159,7 @@ IF189:
%458 = fadd float %457, %456
%459 = fmul float %455, %455
%460 = fadd float %458, %459
- %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
+ %461 = call float @llvm.amdgcn.rsq.f32(float %460)
%462 = fmul float %451, %461
%463 = fmul float %453, %461
%464 = fmul float %455, %461
@@ -1269,7 +1269,7 @@ ENDIF197:
%559 = fadd float %558, %557
%560 = fmul float %556, %556
%561 = fadd float %559, %560
- %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
+ %562 = call float @llvm.amdgcn.rsq.f32(float %561)
%563 = fmul float %562, %561
%564 = fsub float -0.000000e+00, %561
%565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
Copied: llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll (from r258544, llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll?p2=llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll&p1=llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll&r1=258544&r2=258557&rev=258557&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll Fri Jan 22 15:30:34 2016
@@ -1,47 +1,47 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
; CHECK-LABEL: @test_constant_fold_rcp_f32_1
; CHECK-NEXT: ret float 1.000000e+00
define float @test_constant_fold_rcp_f32_1() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+ %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_1
; CHECK-NEXT: ret double 1.000000e+00
define double @test_constant_fold_rcp_f64_1() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+ %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
ret double %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_half
; CHECK-NEXT: ret float 2.000000e+00
define float @test_constant_fold_rcp_f32_half() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+ %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_half
; CHECK-NEXT: ret double 2.000000e+00
define double @test_constant_fold_rcp_f64_half() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+ %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
ret double %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_43
-; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
define float @test_constant_fold_rcp_f32_43() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_43
-; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
define double @test_constant_fold_rcp_f64_43() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+ %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
ret double %val
}
Removed: llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll?rev=258556&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/r600-intrinsics.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_1
-; CHECK-NEXT: ret float 1.000000e+00
-define float @test_constant_fold_rcp_f32_1() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_1
-; CHECK-NEXT: ret double 1.000000e+00
-define double @test_constant_fold_rcp_f64_1() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
- ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_half
-; CHECK-NEXT: ret float 2.000000e+00
-define float @test_constant_fold_rcp_f32_half() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_half
-; CHECK-NEXT: ret double 2.000000e+00
-define double @test_constant_fold_rcp_f64_half() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
- ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_43
-; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
-define float @test_constant_fold_rcp_f32_43() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_43
-; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
-define double @test_constant_fold_rcp_f64_43() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
- ret double %val
-}
-
More information about the llvm-commits
mailing list