[llvm] r297650 - AMDGPU: Fold icmp/fcmp into icmp intrinsic
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 31 03:03:37 PDT 2017
Hi Matt,
this commit causes the attached shader to miscompile. Specifically:
opt -S -instcombine < $shader
causes the logical and in the endif8 block to disappear and one of the
branch conditions to be come constant.
I haven't looked in more detail yet, just found this via a bisect, and
reverting the commit fixes the issue.
Thanks,
Nicolai
On 13.03.2017 19:14, Matt Arsenault via llvm-commits wrote:
> Author: arsenm
> Date: Mon Mar 13 13:14:02 2017
> New Revision: 297650
>
> URL: http://llvm.org/viewvc/llvm-project?rev=297650&view=rev
> Log:
> AMDGPU: Fold icmp/fcmp into icmp intrinsic
>
> The typical use is a library vote function which
> compares to 0. Fold the user condition into the intrinsic.
>
> Modified:
> llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
> llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
>
> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=297650&r1=297649&r2=297650&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Mar 13 13:14:02 2017
> @@ -3365,6 +3365,93 @@ Instruction *InstCombiner::visitCallInst
>
> break;
> }
> + case Intrinsic::amdgcn_icmp:
> + case Intrinsic::amdgcn_fcmp: {
> + const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
> + if (!CC)
> + break;
> +
> + // Guard against invalid arguments.
> + int64_t CCVal = CC->getZExtValue();
> + bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
> + if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
> + CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
> + (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
> + CCVal > CmpInst::LAST_FCMP_PREDICATE)))
> + break;
> +
> + Value *Src0 = II->getArgOperand(0);
> + Value *Src1 = II->getArgOperand(1);
> +
> + if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
> + if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
> + Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
> + return replaceInstUsesWith(*II,
> + ConstantExpr::getSExt(CCmp, II->getType()));
> + }
> +
> + // Canonicalize constants to RHS.
> + CmpInst::Predicate SwapPred
> + = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
> + II->setArgOperand(0, Src1);
> + II->setArgOperand(1, Src0);
> + II->setArgOperand(2, ConstantInt::get(CC->getType(),
> + static_cast<int>(SwapPred)));
> + return II;
> + }
> +
> + if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
> + break;
> +
> + // Canonicalize compare eq with true value to compare != 0
> + // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
> + // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
> + // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
> + // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
> + Value *ExtSrc;
> + if (CCVal == CmpInst::ICMP_EQ &&
> + ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
> + (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
> + ExtSrc->getType()->isIntegerTy(1)) {
> + II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
> + II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
> + return II;
> + }
> +
> + CmpInst::Predicate SrcPred;
> + Value *SrcLHS;
> + Value *SrcRHS;
> +
> + // Fold compare eq/ne with 0 from a compare result as the predicate to the
> + // intrinsic. The typical use is a wave vote function in the library, which
> + // will be fed from a user code condition compared with 0. Fold in the
> + // redundant compare.
> +
> + // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
> + // -> llvm.amdgcn.[if]cmp(a, b, pred)
> + //
> + // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
> + // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
> + if (match(Src1, m_Zero()) &&
> + match(Src0,
> + m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
> + if (CCVal == CmpInst::ICMP_EQ)
> + SrcPred = CmpInst::getInversePredicate(SrcPred);
> +
> + Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
> + Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
> +
> + Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
> + SrcLHS->getType());
> + Value *Args[] = { SrcLHS, SrcRHS,
> + ConstantInt::get(CC->getType(), SrcPred) };
> + CallInst *NewCall = Builder->CreateCall(NewF, Args);
> + NewCall->takeName(II);
> + return replaceInstUsesWith(*II, NewCall);
> + }
> +
> + break;
> + }
> case Intrinsic::stackrestore: {
> // If the save is right next to the restore, remove the restore. This can
> // happen when variable allocas are DCE'd.
>
> Modified: llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll?rev=297650&r1=297649&r2=297650&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll (original)
> +++ llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll Mon Mar 13 13:14:02 2017
> @@ -1207,3 +1207,314 @@ define float @fmed3_qnan0_qnan1_x_f32(fl
> %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
> ret float %med3
> }
> +
> +; --------------------------------------------------------------------
> +; llvm.amdgcn.icmp
> +; --------------------------------------------------------------------
> +
> +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
> +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
> +
> +; Make sure there's no crash for invalid input
> +; CHECK-LABEL: @invalid_nonconstant_icmp_code(
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
> +define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
> + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @invalid_icmp_code(
> +; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
> +; CHECK: %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
> +define i64 @invalid_icmp_code(i32 %a, i32 %b) {
> + %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
> + %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
> + %or = or i64 %under, %over
> + ret i64 %or
> +}
> +
> +; CHECK-LABEL: @icmp_constant_inputs_false(
> +; CHECK: ret i64 0
> +define i64 @icmp_constant_inputs_false() {
> + %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @icmp_constant_inputs_true(
> +; CHECK: ret i64 -1
> +define i64 @icmp_constant_inputs_true() {
> + %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @icmp_constant_to_rhs_slt(
> +; CHECK: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 9, i32 38)
> +define i64 @icmp_constant_to_rhs_slt(i32 %x) {
> + %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
> +define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
> + %cmp = icmp eq i32 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
> +define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
> + %cmp = icmp ne i32 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 41)
> +define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
> + %cmp = icmp sle i32 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
> +define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
> + %cmp = icmp ugt i64 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
> +define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
> + %cmp = icmp ugt i64 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 1)
> +define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
> + %cmp = fcmp oeq float %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
> +define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
> + %cmp = fcmp une float %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 4)
> +define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
> + %cmp = fcmp olt double %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
> +; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
> +define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
> + %cmp = icmp eq i32 %a, %b
> + %sext.cmp = sext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
> +define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
> + %cmp = icmp eq i32 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
> +define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
> + %cmp = icmp slt i32 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
> +define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
> + %cmp = fcmp oeq float %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 2)
> +define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
> + %cmp = fcmp ule float %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 13)
> +define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
> + %cmp = fcmp ogt float %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
> +define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
> + %cmp = icmp eq i32 %a, %b
> + %zext.cmp = zext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
> +; CHECK: %zext.cond = zext i1 %cond to i32
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 0, i32 33)
> +define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
> + %zext.cond = zext i1 %cond to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
> +; CHECK: %zext.cond = zext i1 %cond to i32
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
> +define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
> + %zext.cond = zext i1 %cond to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
> +; CHECK: %sext.cond = sext i1 %cond to i32
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
> +define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
> + %sext.cond = sext i1 %cond to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
> +; CHECK: %sext.cond = sext i1 %cond to i32
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 0, i32 33)
> +define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
> + %sext.cond = sext i1 %cond to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
> +; CHECK: %sext.cond = sext i1 %cond to i64
> +; CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 0, i32 33)
> +define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
> + %sext.cond = sext i1 %cond to i64
> + %mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32)
> + ret i64 %mask
> +}
> +
> +; TODO: Should be able to fold to false
> +; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
> +; CHECK: %cmp = icmp eq i32 %a, %b
> +; CHECK: %sext.cmp = sext i1 %cmp to i32
> +; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
> +define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
> + %cmp = icmp eq i32 %a, %b
> + %sext.cmp = sext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
> +define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
> + %cmp = icmp eq i32 %a, %b
> + %sext.cmp = sext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
> +; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
> +define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
> + %cmp = icmp sge i32 %a, %b
> + %sext.cmp = sext i1 %cmp to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
> + ret i64 %mask
> +}
> +
> +; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
> +; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 38)
> +define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
> + %cmp = icmp sle i32 %a, %b
> + %not = xor i1 %cmp, true
> + %zext.cmp = zext i1 %not to i32
> + %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
> + ret i64 %mask
> +}
> +
> +; --------------------------------------------------------------------
> +; llvm.amdgcn.fcmp
> +; --------------------------------------------------------------------
> +
> +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
> +
> +; Make sure there's no crash for invalid input
> +; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
> +; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
> +define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
> + %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @invalid_fcmp_code(
> +; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
> +; CHECK: %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
> +define i64 @invalid_fcmp_code(float %a, float %b) {
> + %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
> + %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
> + %or = or i64 %under, %over
> + ret i64 %or
> +}
> +
> +; CHECK-LABEL: @fcmp_constant_inputs_false(
> +; CHECK: ret i64 0
> +define i64 @fcmp_constant_inputs_false() {
> + %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @fcmp_constant_inputs_true(
> +; CHECK: ret i64 -1
> +define i64 @fcmp_constant_inputs_true() {
> + %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
> + ret i64 %result
> +}
> +
> +; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
> +; CHECK: %result = call i64 @llvm.amdgcn.fcmp.f32(float %x, float 4.000000e+00, i32 2)
> +define i64 @fcmp_constant_to_rhs_olt(float %x) {
> + %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
> + ret i64 %result
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
-------------- next part --------------
; ModuleID = '<stdin>'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
%24 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %23) #1, !range !0
%25 = call i64 @llvm.amdgcn.icmp.i32(i32 -1, i32 0, i32 33) #2
%26 = bitcast i64 %25 to <2 x i32>
%27 = extractelement <2 x i32> %26, i32 0
%28 = extractelement <2 x i32> %26, i32 1
%29 = bitcast i32 %27 to float
%30 = bitcast i32 %28 to float
br label %loop4
loop4: ; preds = %endif35, %if16, %main_body
%TEMP1.x.0 = phi float [ 0.000000e+00, %main_body ], [ %61, %if16 ], [ %75, %endif35 ]
%31 = bitcast float %TEMP1.x.0 to i32
%32 = icmp uge i32 %31, 64
%33 = sext i1 %32 to i32
%34 = bitcast i32 %33 to float
br i1 %32, label %if6, label %endif8
if6: ; preds = %loop4
br label %endloop37
endif8: ; preds = %loop4
%35 = zext i32 %31 to i64
%36 = bitcast i64 %35 to <2 x i32>
%37 = extractelement <2 x i32> %36, i32 0
%38 = extractelement <2 x i32> %36, i32 1
%39 = bitcast i32 %37 to float
%40 = bitcast i32 %38 to float
%41 = insertelement <2 x i32> undef, i32 %37, i32 0
%42 = insertelement <2 x i32> %41, i32 %38, i32 1
%43 = bitcast <2 x i32> %42 to i64
%44 = shl i64 bitcast (<2 x i32> <i32 1, i32 0> to i64), %43
%45 = bitcast i64 %44 to <2 x i32>
%46 = extractelement <2 x i32> %45, i32 0
%47 = extractelement <2 x i32> %45, i32 1
%48 = bitcast i32 %46 to float
%49 = bitcast i32 %47 to float
%50 = and i32 %27, %46
%51 = and i32 %28, %47
%52 = bitcast i32 %50 to float
%53 = bitcast i32 %51 to float
%54 = insertelement <2 x i32> undef, i32 %50, i32 0
%55 = insertelement <2 x i32> %54, i32 %51, i32 1
%56 = bitcast <2 x i32> %55 to i64
%57 = icmp eq i64 %56, 0
%58 = sext i1 %57 to i32
%59 = bitcast i32 %58 to float
br i1 %57, label %if16, label %endif19
if16: ; preds = %endif8
%60 = add i32 %31, 1
%61 = bitcast i32 %60 to float
br label %loop4
endif19: ; preds = %endif8
%62 = bitcast i32 %24 to float
%63 = call i32 @llvm.amdgcn.readlane(i32 %24, i32 %31) #2
%64 = bitcast i32 %63 to float
%65 = icmp ne i32 %63, %31
%66 = sext i1 %65 to i32
%67 = bitcast i32 %66 to float
br i1 %65, label %if22, label %endif35
if22: ; preds = %endif19
%68 = uitofp i32 %63 to float
%69 = fmul nsz float %68, 0x3F70101020000000
%70 = uitofp i32 %31 to float
%71 = fmul nsz float %70, 0x3F70101020000000
%72 = uitofp i32 %24 to float
%73 = fmul nsz float %72, 0x3F70101020000000
br label %endloop37
endif35: ; preds = %endif19
%74 = add i32 %31, 1
%75 = bitcast i32 %74 to float
br label %loop4
endloop37: ; preds = %if22, %if6
%OUT0.w.0 = phi float [ 1.000000e+00, %if6 ], [ %73, %if22 ]
%OUT0.z.0 = phi float [ 0.000000e+00, %if6 ], [ %71, %if22 ]
%OUT0.y.0 = phi float [ 1.000000e+00, %if6 ], [ %69, %if22 ]
%OUT0.x.0 = phi float [ 0.000000e+00, %if6 ], [ 1.000000e+00, %if22 ]
%76 = bitcast float %5 to i32
%77 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %76, 10
%78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %77, float %OUT0.x.0, 11
%79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %78, float %OUT0.y.0, 12
%80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %79, float %OUT0.z.0, 13
%81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %80, float %OUT0.w.0, 14
%82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %81, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %82
}
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
; Function Attrs: convergent nounwind readnone
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #2
; Function Attrs: convergent nounwind readnone
declare i32 @llvm.amdgcn.readlane(i32, i32) #2
attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { convergent nounwind readnone }
!0 = !{i32 0, i32 64}
More information about the llvm-commits
mailing list