[llvm] r216066 - Optimize ZERO_EXTEND and SIGN_EXTEND in both SelectionDAG Builder and type
Rafael EspĂndola
rafael.espindola at gmail.com
Wed Aug 20 15:41:23 PDT 2014
I am seeing some massive slowdowns (infinite loop?) in a lto build of
clang after this.
With 216065 running llc (release+asserts) on the combined optimized
module takes 543.097 seconds. With this revision (216066) I killed it
after 39 minutes :-(
Would you mind reverting this for now? I will try to put the .bc file
somewhere you can access.
On 20 August 2014 08:05, Jiangning Liu <jiangning.liu at arm.com> wrote:
> Author: jiangning
> Date: Wed Aug 20 07:05:15 2014
> New Revision: 216066
>
> URL: http://llvm.org/viewvc/llvm-project?rev=216066&view=rev
> Log:
> Optimize ZERO_EXTEND and SIGN_EXTEND in both SelectionDAG Builder and type
> legalization stage. With those two optimizations, fewer signed/zero extension
> instructions can be inserted, and then we can expose more opportunities to
> Machine CSE pass in back-end.
>
>
> Added:
> llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=216066&r1=216065&r2=216066&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Wed Aug 20 07:05:15 2014
> @@ -861,7 +861,28 @@ void DAGTypeLegalizer::PromoteSetCCOpera
> switch (CCCode) {
> default: llvm_unreachable("Unknown integer comparison!");
> case ISD::SETEQ:
> - case ISD::SETNE:
> + case ISD::SETNE: {
> + SDValue OpL, OpR;
> +
> + OpL = GetPromotedInteger(NewLHS);
> + OpR = GetPromotedInteger(NewRHS);
> +
> + // We would prefer to promote the comparison operand with sign extension,
> + // if we find the operand is actually to truncate an AssertSext. With this
> + // optimization, we can avoid inserting real truncate instruction, which
> + // is redudant eventually.
> + if (OpL->getOpcode() == ISD::AssertSext &&
> + cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
> + OpR->getOpcode() == ISD::AssertSext &&
> + cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
> + NewLHS = OpL;
> + NewRHS = OpR;
> + } else {
> + NewLHS = ZExtPromotedInteger(NewLHS);
> + NewRHS = ZExtPromotedInteger(NewRHS);
> + }
> + break;
> + }
> case ISD::SETUGE:
> case ISD::SETUGT:
> case ISD::SETULE:
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=216066&r1=216065&r2=216066&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Aug 20 07:05:15 2014
> @@ -757,6 +757,28 @@ SDValue RegsForValue::getCopyFromRegs(Se
> return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
> }
>
> +static ISD::NodeType getPreferredExtendForValue(const Value *V) {
> + // For the users of the source value being used for compare instruction, if
> + // the number of signed predicate is greater than unsigned predicate, we
> + // prefer to use SIGN_EXTEND.
> + //
> + // With this optimization, we would be able to reduce some redundant sign or
> + // zero extension instruction, and eventually more machine CSE opportunities
> + // can be exposed.
> + ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
> + unsigned int NumOfSigned = 0, NumOfUnsigned = 0;
> + for (const User *U : V->users()) {
> + if (const CmpInst *CI = dyn_cast<CmpInst>(U)) {
> + NumOfSigned += CI->isSigned();
> + NumOfUnsigned += CI->isUnsigned();
> + }
> + }
> + if (NumOfSigned > NumOfUnsigned)
> + ExtendKind = ISD::SIGN_EXTEND;
> +
> + return ExtendKind;
> +}
> +
> /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
> /// specified value into the registers specified by this object. This uses
> /// Chain/Flag as the input and updates them for the output Chain/Flag.
> @@ -765,6 +787,7 @@ void RegsForValue::getCopyToRegs(SDValue
> SDValue &Chain, SDValue *Flag,
> const Value *V) const {
> const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> + ISD::NodeType ExtendKind = getPreferredExtendForValue(V);
>
> // Get the list of the values's legal parts.
> unsigned NumRegs = Regs.size();
> @@ -773,8 +796,9 @@ void RegsForValue::getCopyToRegs(SDValue
> EVT ValueVT = ValueVTs[Value];
> unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
> MVT RegisterVT = RegVTs[Value];
> - ISD::NodeType ExtendKind =
> - TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
> +
> + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
> + ExtendKind = ISD::ZERO_EXTEND;
>
> getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
> &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
>
> Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll?rev=216066&r1=216065&r2=216066&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll Wed Aug 20 07:05:15 2014
> @@ -493,6 +493,7 @@ define i8 @test_atomic_load_min_i8(i8 %o
> ; CHECK-LABEL: test_atomic_load_min_i8:
> %old = atomicrmw min i8* @var8, i8 %offset acquire
> ; CHECK-NOT: dmb
> +; CHECK: sxtb w[[TMP:[0-9]+]], w0
> ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
> ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
>
> @@ -502,14 +503,13 @@ define i8 @test_atomic_load_min_i8(i8 %o
> ; function there.
>
> ; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
> -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
> -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
> +; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]]
> +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], le
>
> ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
> ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> ; CHECK-NOT: dmb
>
> -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> ret i8 %old
> }
>
> @@ -517,6 +517,7 @@ define i16 @test_atomic_load_min_i16(i16
> ; CHECK-LABEL: test_atomic_load_min_i16:
> %old = atomicrmw min i16* @var16, i16 %offset release
> ; CHECK-NOT: dmb
> +; CHECK: sxth w[[TMP:[0-9]+]], w0
> ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
> ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
>
> @@ -526,15 +527,14 @@ define i16 @test_atomic_load_min_i16(i16
> ; function there.
>
> ; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
> -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
> -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
> +; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]]
> +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], le
>
>
> ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
> ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> ; CHECK-NOT: dmb
>
> -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> ret i16 %old
> }
>
> @@ -590,6 +590,7 @@ define i8 @test_atomic_load_max_i8(i8 %o
> ; CHECK-LABEL: test_atomic_load_max_i8:
> %old = atomicrmw max i8* @var8, i8 %offset seq_cst
> ; CHECK-NOT: dmb
> +; CHECK: sxtb w[[TMP:[0-9]+]], w0
> ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
> ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
>
> @@ -599,15 +600,14 @@ define i8 @test_atomic_load_max_i8(i8 %o
> ; function there.
>
> ; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
> -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
> -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
> +; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]]
> +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], gt
>
>
> ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
> ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> ; CHECK-NOT: dmb
>
> -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> ret i8 %old
> }
>
> @@ -615,6 +615,7 @@ define i16 @test_atomic_load_max_i16(i16
> ; CHECK-LABEL: test_atomic_load_max_i16:
> %old = atomicrmw max i16* @var16, i16 %offset acquire
> ; CHECK-NOT: dmb
> +; CHECK: sxth w[[TMP:[0-9]+]], w0
> ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
> ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
>
> @@ -624,15 +625,14 @@ define i16 @test_atomic_load_max_i16(i16
> ; function there.
>
> ; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
> -; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
> -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
> +; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]]
> +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], gt
>
>
> ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
> ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> ; CHECK-NOT: dmb
>
> -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> ret i16 %old
> }
>
>
> Added: llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll?rev=216066&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll Wed Aug 20 07:05:15 2014
> @@ -0,0 +1,254 @@
> +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
> +
> +; The following cases are for i16
> +
> +%struct.s_signed_i16 = type { i16, i16, i16 }
> +%struct.s_unsigned_i16 = type { i16, i16, i16 }
> +
> + at cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2
> + at cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2
> +
> +define void @test_i16_2cmp_signed_1() {
> +; CHECK-LABEL: test_i16_2cmp_signed_1
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.gt
> +; CHECK-NOT: cmp
> +; CHECK: b.ne
> +entry:
> + %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
> + %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
> + %cmp = icmp sgt i16 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp eq i16 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +define void @test_i16_2cmp_signed_2() {
> +; CHECK-LABEL: test_i16_2cmp_signed_2
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.le
> +; CHECK-NOT: cmp
> +; CHECK: b.ge
> +entry:
> + %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
> + %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
> + %cmp = icmp sgt i16 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp slt i16 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +define void @test_i16_2cmp_unsigned_1() {
> +; CHECK-LABEL: test_i16_2cmp_unsigned_1
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.hi
> +; CHECK-NOT: cmp
> +; CHECK: b.ne
> +entry:
> + %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
> + %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
> + %cmp = icmp ugt i16 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp eq i16 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +define void @test_i16_2cmp_unsigned_2() {
> +; CHECK-LABEL: test_i16_2cmp_unsigned_2
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.ls
> +; CHECK-NOT: cmp
> +; CHECK: b.hs
> +entry:
> + %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
> + %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
> + %cmp = icmp ugt i16 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp ult i16 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +; The following cases are for i8
> +
> +%struct.s_signed_i8 = type { i8, i8, i8 }
> +%struct.s_unsigned_i8 = type { i8, i8, i8 }
> +
> + at cost_s = common global %struct.s_signed_i8 zeroinitializer, align 2
> + at cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2
> +
> +
> +define void @test_i8_2cmp_signed_1() {
> +; CHECK-LABEL: test_i8_2cmp_signed_1
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.gt
> +; CHECK-NOT: cmp
> +; CHECK: b.ne
> +entry:
> + %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
> + %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
> + %cmp = icmp sgt i8 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp eq i8 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +define void @test_i8_2cmp_signed_2() {
> +; CHECK-LABEL: test_i8_2cmp_signed_2
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.le
> +; CHECK-NOT: cmp
> +; CHECK: b.ge
> +entry:
> + %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
> + %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
> + %cmp = icmp sgt i8 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp slt i8 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +define void @test_i8_2cmp_unsigned_1() {
> +; CHECK-LABEL: test_i8_2cmp_unsigned_1
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.hi
> +; CHECK-NOT: cmp
> +; CHECK: b.ne
> +entry:
> + %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
> + %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
> + %cmp = icmp ugt i8 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp eq i8 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +define void @test_i8_2cmp_unsigned_2() {
> +; CHECK-LABEL: test_i8_2cmp_unsigned_2
> +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> +; CHECK-NEXT: b.ls
> +; CHECK-NOT: cmp
> +; CHECK: b.hs
> +entry:
> + %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
> + %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
> + %cmp = icmp ugt i8 %0, %1
> + br i1 %cmp, label %if.then, label %if.else
> +
> +if.then: ; preds = %entry
> + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.else: ; preds = %entry
> + %cmp5 = icmp ult i8 %0, %1
> + br i1 %cmp5, label %if.then7, label %if.end8
> +
> +if.then7: ; preds = %if.else
> + store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
> + br label %if.end8
> +
> +if.end8: ; preds = %if.else, %if.then7, %if.then
> + ret void
> +}
> +
> +; Make sure the case below won't crash.
> +
> +; The optimization of ZERO_EXTEND and SIGN_EXTEND in type legalization stage can't assert
> +; the operand of a set_cc is always a TRUNCATE.
> +
> +define i1 @foo(float %inl, float %inr) {
> + %lval = fptosi float %inl to i8
> + %rval = fptosi float %inr to i8
> + %sum = icmp eq i8 %lval, %rval
> + ret i1 %sum
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list