[llvm] r218101 - Optimize sext/zext insertion algorithm in back-end.
Jiangning Liu
liujiangning1 at gmail.com
Tue Sep 23 20:35:21 PDT 2014
Hi Bob,
Sorry to know about that. I just committed a fix r218364 to clear
PreferredExtendType
in function state info.
Let me know if you still see memory consumption issue. I tried the original
huge case, and I can't see memory issue.
Thanks,
-Jiangning
2014-09-24 1:56 GMT+08:00 Bob Wilson <bob.wilson at apple.com>:
> We’re seeing significant regressions in memory usage. I think you forgot
> to clear the PreferredExtendType DenseMap that you added here.
>
> > On Sep 18, 2014, at 10:30 PM, Jiangning Liu <jiangning.liu at arm.com>
> wrote:
> >
> > Author: jiangning
> > Date: Fri Sep 19 00:30:35 2014
> > New Revision: 218101
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=218101&view=rev
> > Log:
> > Optimize sext/zext insertion algorithm in back-end.
> >
> > With this optimization, we will not always insert zext for values
> crossing
> > basic blocks, but insert sext if the users of a value crossing basic
> block
> > has preference of sign predicate.
> >
> >
> > Added:
> > llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll
> > Modified:
> > llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h
> > llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
> > llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
> > llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> > llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
> >
> > Modified: llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h?rev=218101&r1=218100&r2=218101&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h (original)
> > +++ llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h Fri Sep 19
> 00:30:35 2014
> > @@ -21,6 +21,7 @@
> > #include "llvm/ADT/SmallPtrSet.h"
> > #include "llvm/ADT/SmallVector.h"
> > #include "llvm/CodeGen/MachineBasicBlock.h"
> > +#include "llvm/CodeGen/ISDOpcodes.h"
> > #include "llvm/IR/InlineAsm.h"
> > #include "llvm/IR/Instructions.h"
> > #include "llvm/Target/TargetRegisterInfo.h"
> > @@ -106,6 +107,10 @@ public:
> > KnownZero(1, 0) {}
> > };
> >
> > + /// Record the preferred extend type (ISD::SIGN_EXTEND or
> ISD::ZERO_EXTEND)
> > + /// for a value.
> > + DenseMap<const Value *, ISD::NodeType> PreferredExtendType;
> > +
> > /// VisitedBBs - The set of basic blocks visited thus far by
> instruction
> > /// selection.
> > SmallPtrSet<const BasicBlock*, 4> VisitedBBs;
> >
> > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp?rev=218101&r1=218100&r2=218101&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
> (original)
> > +++ llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp Fri Sep
> 19 00:30:35 2014
> > @@ -56,6 +56,28 @@ static bool isUsedOutsideOfDefiningBlock
> > return false;
> > }
> >
> > +static ISD::NodeType getPreferredExtendForValue(const Value *V) {
> > + // For the users of the source value being used for compare
> instruction, if
> > + // the number of signed predicate is greater than unsigned predicate,
> we
> > + // prefer to use SIGN_EXTEND.
> > + //
> > + // With this optimization, we would be able to reduce some redundant
> sign or
> > + // zero extension instruction, and eventually more machine CSE
> opportunities
> > + // can be exposed.
> > + ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
> > + unsigned NumOfSigned = 0, NumOfUnsigned = 0;
> > + for (const User *U : V->users()) {
> > + if (const auto *CI = dyn_cast<CmpInst>(U)) {
> > + NumOfSigned += CI->isSigned();
> > + NumOfUnsigned += CI->isUnsigned();
> > + }
> > + }
> > + if (NumOfSigned > NumOfUnsigned)
> > + ExtendKind = ISD::SIGN_EXTEND;
> > +
> > + return ExtendKind;
> > +}
> > +
> > void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
> > SelectionDAG *DAG) {
> > const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
> > @@ -182,6 +204,9 @@ void FunctionLoweringInfo::set(const Fun
> > }
> > }
> > }
> > +
> > + // Decide the preferred extend type for a value.
> > + PreferredExtendType[I] = getPreferredExtendForValue(I);
> > }
> >
> > // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.
> This
> >
> > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=218101&r1=218100&r2=218101&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
> (original)
> > +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Fri Sep
> 19 00:30:35 2014
> > @@ -862,7 +862,26 @@ void DAGTypeLegalizer::PromoteSetCCOpera
> > switch (CCCode) {
> > default: llvm_unreachable("Unknown integer comparison!");
> > case ISD::SETEQ:
> > - case ISD::SETNE:
> > + case ISD::SETNE: {
> > + SDValue OpL = GetPromotedInteger(NewLHS);
> > + SDValue OpR = GetPromotedInteger(NewRHS);
> > +
> > + // We would prefer to promote the comparison operand with sign
> extension,
> > + // if we find the operand is actually to truncate an AssertSext.
> With this
> > + // optimization, we can avoid inserting real truncate instruction,
> which
> > + // is redudant eventually.
> > + if (OpL->getOpcode() == ISD::AssertSext &&
> > + cast<VTSDNode>(OpL->getOperand(1))->getVT() ==
> NewLHS.getValueType() &&
> > + OpR->getOpcode() == ISD::AssertSext &&
> > + cast<VTSDNode>(OpR->getOperand(1))->getVT() ==
> NewRHS.getValueType()) {
> > + NewLHS = OpL;
> > + NewRHS = OpR;
> > + } else {
> > + NewLHS = ZExtPromotedInteger(NewLHS);
> > + NewRHS = ZExtPromotedInteger(NewRHS);
> > + }
> > + break;
> > + }
> > case ISD::SETUGE:
> > case ISD::SETUGT:
> > case ISD::SETULE:
> >
> > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=218101&r1=218100&r2=218101&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> (original)
> > +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Fri Sep
> 19 00:30:35 2014
> > @@ -646,8 +646,10 @@ namespace {
> > /// specified value into the registers specified by this object.
> This uses
> > /// Chain/Flag as the input and updates them for the output
> Chain/Flag.
> > /// If the Flag pointer is NULL, no flag is used.
> > - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
> > - SDValue &Chain, SDValue *Flag, const Value *V)
> const;
> > + void
> > + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue
> &Chain,
> > + SDValue *Flag, const Value *V,
> > + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND)
> const;
> >
> > /// AddInlineAsmOperands - Add this value to the specified inlineasm
> node
> > /// operand list. This adds the code marker, matching input operand
> index
> > @@ -762,9 +764,10 @@ SDValue RegsForValue::getCopyFromRegs(Se
> > /// Chain/Flag as the input and updates them for the output Chain/Flag.
> > /// If the Flag pointer is NULL, no flag is used.
> > void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc
> dl,
> > - SDValue &Chain, SDValue *Flag,
> > - const Value *V) const {
> > + SDValue &Chain, SDValue *Flag, const
> Value *V,
> > + ISD::NodeType PreferredExtendType)
> const {
> > const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> > + ISD::NodeType ExtendKind = PreferredExtendType;
> >
> > // Get the list of the values's legal parts.
> > unsigned NumRegs = Regs.size();
> > @@ -773,8 +776,9 @@ void RegsForValue::getCopyToRegs(SDValue
> > EVT ValueVT = ValueVTs[Value];
> > unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
> > MVT RegisterVT = RegVTs[Value];
> > - ISD::NodeType ExtendKind =
> > - TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND:
> ISD::ANY_EXTEND;
> > +
> > + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val,
> RegisterVT))
> > + ExtendKind = ISD::ZERO_EXTEND;
> >
> > getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
> > &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
> > @@ -7429,7 +7433,12 @@ SelectionDAGBuilder::CopyValueToVirtualR
> > const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
> > RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType());
> > SDValue Chain = DAG.getEntryNode();
> > - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V);
> > +
> > + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
> > + FuncInfo.PreferredExtendType.end())
> > + ? ISD::ANY_EXTEND
> > + : FuncInfo.PreferredExtendType[V];
> > + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V,
> ExtendType);
> > PendingExports.push_back(Chain);
> > }
> >
> >
> > Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll?rev=218101&r1=218100&r2=218101&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll (original)
> > +++ llvm/trunk/test/CodeGen/AArch64/atomic-ops.ll Fri Sep 19 00:30:35
> 2014
> > @@ -509,7 +509,7 @@ define i8 @test_atomic_load_min_i8(i8 %o
> > ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> > ; CHECK-NOT: dmb
> >
> > -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> > +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
> > ret i8 %old
> > }
> >
> > @@ -534,7 +534,7 @@ define i16 @test_atomic_load_min_i16(i16
> > ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> > ; CHECK-NOT: dmb
> >
> > -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> > +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
> > ret i16 %old
> > }
> >
> > @@ -607,7 +607,7 @@ define i8 @test_atomic_load_max_i8(i8 %o
> > ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> > ; CHECK-NOT: dmb
> >
> > -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> > +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
> > ret i8 %old
> > }
> >
> > @@ -632,7 +632,7 @@ define i16 @test_atomic_load_max_i16(i16
> > ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
> > ; CHECK-NOT: dmb
> >
> > -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
> > +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
> > ret i16 %old
> > }
> >
> >
> > Added: llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll?rev=218101&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll (added)
> > +++ llvm/trunk/test/CodeGen/AArch64/rm_redundant_cmp.ll Fri Sep 19
> 00:30:35 2014
> > @@ -0,0 +1,254 @@
> > +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
> > +
> > +; The following cases are for i16
> > +
> > +%struct.s_signed_i16 = type { i16, i16, i16 }
> > +%struct.s_unsigned_i16 = type { i16, i16, i16 }
> > +
> > + at cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer,
> align 2
> > + at cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer,
> align 2
> > +
> > +define void @test_i16_2cmp_signed_1() {
> > +; CHECK-LABEL: test_i16_2cmp_signed_1
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.gt
> > +; CHECK-NOT: cmp
> > +; CHECK: b.ne
> > +entry:
> > + %0 = load i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 1), align 2
> > + %1 = load i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 2), align 2
> > + %cmp = icmp sgt i16 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp eq i16 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +define void @test_i16_2cmp_signed_2() {
> > +; CHECK-LABEL: test_i16_2cmp_signed_2
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.le
> > +; CHECK-NOT: cmp
> > +; CHECK: b.ge
> > +entry:
> > + %0 = load i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 1), align 2
> > + %1 = load i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 2), align 2
> > + %cmp = icmp sgt i16 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp slt i16 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16*
> @cost_s_i8_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +define void @test_i16_2cmp_unsigned_1() {
> > +; CHECK-LABEL: test_i16_2cmp_unsigned_1
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.hi
> > +; CHECK-NOT: cmp
> > +; CHECK: b.ne
> > +entry:
> > + %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 1), align 2
> > + %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 2), align 2
> > + %cmp = icmp ugt i16 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp eq i16 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +define void @test_i16_2cmp_unsigned_2() {
> > +; CHECK-LABEL: test_i16_2cmp_unsigned_2
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.ls
> > +; CHECK-NOT: cmp
> > +; CHECK: b.hs
> > +entry:
> > + %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 1), align 2
> > + %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 2), align 2
> > + %cmp = icmp ugt i16 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp ult i16 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16*
> @cost_u_i16, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +; The following cases are for i8
> > +
> > +%struct.s_signed_i8 = type { i8, i8, i8 }
> > +%struct.s_unsigned_i8 = type { i8, i8, i8 }
> > +
> > + at cost_s = common global %struct.s_signed_i8 zeroinitializer, align 2
> > + at cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align
> 2
> > +
> > +
> > +define void @test_i8_2cmp_signed_1() {
> > +; CHECK-LABEL: test_i8_2cmp_signed_1
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.gt
> > +; CHECK-NOT: cmp
> > +; CHECK: b.ne
> > +entry:
> > + %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s,
> i64 0, i32 1), align 2
> > + %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s,
> i64 0, i32 2), align 2
> > + %cmp = icmp sgt i8 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8*
> @cost_s, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp eq i8 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8*
> @cost_s, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +define void @test_i8_2cmp_signed_2() {
> > +; CHECK-LABEL: test_i8_2cmp_signed_2
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.le
> > +; CHECK-NOT: cmp
> > +; CHECK: b.ge
> > +entry:
> > + %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s,
> i64 0, i32 1), align 2
> > + %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s,
> i64 0, i32 2), align 2
> > + %cmp = icmp sgt i8 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8*
> @cost_s, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp slt i8 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8*
> @cost_s, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +define void @test_i8_2cmp_unsigned_1() {
> > +; CHECK-LABEL: test_i8_2cmp_unsigned_1
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.hi
> > +; CHECK-NOT: cmp
> > +; CHECK: b.ne
> > +entry:
> > + %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 1), align 2
> > + %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 2), align 2
> > + %cmp = icmp ugt i8 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp eq i8 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +define void @test_i8_2cmp_unsigned_2() {
> > +; CHECK-LABEL: test_i8_2cmp_unsigned_2
> > +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
> > +; CHECK-NEXT: b.ls
> > +; CHECK-NOT: cmp
> > +; CHECK: b.hs
> > +entry:
> > + %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 1), align 2
> > + %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 2), align 2
> > + %cmp = icmp ugt i8 %0, %1
> > + br i1 %cmp, label %if.then, label %if.else
> > +
> > +if.then: ; preds = %entry
> > + store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.else: ; preds = %entry
> > + %cmp5 = icmp ult i8 %0, %1
> > + br i1 %cmp5, label %if.then7, label %if.end8
> > +
> > +if.then7: ; preds = %if.else
> > + store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8*
> @cost_u_i8, i64 0, i32 0), align 2
> > + br label %if.end8
> > +
> > +if.end8: ; preds = %if.else,
> %if.then7, %if.then
> > + ret void
> > +}
> > +
> > +; Make sure the case below won't crash.
> > +
> > +; The optimization of ZERO_EXTEND and SIGN_EXTEND in type legalization
> stage can't assert
> > +; the operand of a set_cc is always a TRUNCATE.
> > +
> > +define i1 @foo(float %inl, float %inr) {
> > + %lval = fptosi float %inl to i8
> > + %rval = fptosi float %inr to i8
> > + %sum = icmp eq i8 %lval, %rval
> > + ret i1 %sum
> > +}
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140924/6edc32c5/attachment.html>
More information about the llvm-commits
mailing list