[llvm] r336509 - [LoopIdiomRecognize] Support for converting loops that use LSHR to CTLZ.
Eric Christopher via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 11 14:54:08 PDT 2018
Hi Craig,
Since we have a testcase here, could we get this reverted until we can get
a fix (unless you'll have one shortly of course :).
Thanks!
-eric
On Wed, Jul 11, 2018 at 1:43 PM Wei Mi via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Hi,
>
> We run into a SEGV in a testcase. Here is a reproducible. Please take a
> look.
>
> ----------- 1.cc -------------
> unsigned foo(unsigned input) {
> unsigned num = 0;
> do {
> ++num;
> input >>= 1;
> } while (input != 0);
> return num;
> }
>
> int main() {
> __builtin_printf("%u\n", foo(0));
> }
> ---------------------------------
>
> ~/workarea/llvm-r336508/rbuild/bin/clang -O2 1.cc -o good.out; ./good.out
> 1
> ~/workarea/llvm-r336509/rbuild/bin/clang -O2 1.cc -o bad.out; ./bad.out
> 0
>
> Thanks,
> Wei.
>
> On Sat, Jul 7, 2018 at 6:45 PM, Craig Topper via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: ctopper
>> Date: Sat Jul 7 18:45:47 2018
>> New Revision: 336509
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=336509&view=rev
>> Log:
>> [LoopIdiomRecognize] Support for converting loops that use LSHR to CTLZ.
>>
>> In the 'detectCTLZIdiom' function support for loops that use LSHR
>> instruction instead of ASHR has been added.
>>
>> This supports creating ctlz from the following code.
>>
>> int lzcnt(int x) {
>> int count = 0;
>> while (x > 0) {
>> count++;
>> x = x >> 1;
>> }
>> return count;
>> }
>>
>> Patch by Olga Moldovanova
>>
>> Differential Revision: https://reviews.llvm.org/D48354
>>
>> Modified:
>> llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>> llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll
>>
>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=336509&r1=336508&r2=336509&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Jul 7
>> 18:45:47 2018
>> @@ -188,8 +188,9 @@ private:
>> PHINode *CntPhi, Value *Var);
>> bool recognizeAndInsertCTLZ();
>> void transformLoopToCountable(BasicBlock *PreCondBB, Instruction
>> *CntInst,
>> - PHINode *CntPhi, Value *Var, const
>> DebugLoc &DL,
>> - bool ZeroCheck, bool
>> IsCntPhiUsedOutsideLoop);
>> + PHINode *CntPhi, Value *Var, Instruction
>> *DefX,
>> + const DebugLoc &DL, bool ZeroCheck,
>> + bool IsCntPhiUsedOutsideLoop);
>>
>> /// @}
>> };
>> @@ -1316,8 +1317,8 @@ static bool detectCTLZIdiom(Loop *CurLoo
>> return false;
>>
>> // step 2: detect instructions corresponding to "x.next = x >> 1"
>> - // TODO: Support loops that use LShr.
>> - if (!DefX || DefX->getOpcode() != Instruction::AShr)
>> + if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
>> + DefX->getOpcode() != Instruction::LShr))
>> return false;
>> ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
>> if (!Shft || !Shft->isOne())
>> @@ -1401,8 +1402,7 @@ bool LoopIdiomRecognize::recognizeAndIns
>>
>> // Make sure the initial value can't be negative otherwise the ashr in
>> the
>> // loop might never reach zero which would make the loop infinite.
>> - // TODO: Support loops that use lshr and wouldn't need this check.
>> - if (!isKnownNonNegative(InitX, *DL))
>> + if (DefX->getOpcode() == Instruction::AShr &&
>> !isKnownNonNegative(InitX, *DL))
>> return false;
>>
>> // If we check X != 0 before entering the loop we don't need a zero
>> @@ -1433,8 +1433,9 @@ bool LoopIdiomRecognize::recognizeAndIns
>> TargetTransformInfo::TCC_Basic)
>> return false;
>>
>> - transformLoopToCountable(PH, CntInst, CntPhi, InitX,
>> DefX->getDebugLoc(),
>> - ZeroCheck, IsCntPhiUsedOutsideLoop);
>> + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
>> + DefX->getDebugLoc(), ZeroCheck,
>> + IsCntPhiUsedOutsideLoop);
>> return true;
>> }
>>
>> @@ -1547,7 +1548,8 @@ static CallInst *createCTLZIntrinsic(IRB
>> /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
>> void LoopIdiomRecognize::transformLoopToCountable(
>> BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value
>> *InitX,
>> - const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
>> + Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
>> + bool IsCntPhiUsedOutsideLoop) {
>> BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
>>
>> // Step 1: Insert the CTLZ instruction at the end of the preheader
>> block
>> @@ -1558,10 +1560,16 @@ void LoopIdiomRecognize::transformLoopTo
>> Builder.SetCurrentDebugLocation(DL);
>> Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
>>
>> - if (IsCntPhiUsedOutsideLoop)
>> - InitXNext = Builder.CreateAShr(InitX,
>> - ConstantInt::get(InitX->getType(),
>> 1));
>> - else
>> + if (IsCntPhiUsedOutsideLoop) {
>> + if (DefX->getOpcode() == Instruction::AShr)
>> + InitXNext =
>> + Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(),
>> 1));
>> + else if (DefX->getOpcode() == Instruction::LShr)
>> + InitXNext =
>> + Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(),
>> 1));
>> + else
>> + llvm_unreachable("Unexpected opcode!");
>> + } else
>> InitXNext = InitX;
>> CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
>> Count = Builder.CreateSub(
>>
>> Modified: llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll?rev=336509&r1=336508&r2=336509&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll (original)
>> +++ llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll Sat Jul 7 18:45:47
>> 2018
>> @@ -119,6 +119,52 @@ while.end:
>> ; Here it will replace the loop -
>> ; assume builtin is always profitable.
>> ;
>> +; int ctlz_zero_check_lshr(int n)
>> +; {
>> +; int i = 0;
>> +; while(n) {
>> +; n >>= 1;
>> +; i++;
>> +; }
>> +; return i;
>> +; }
>> +;
>> +; ALL: entry
>> +; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
>> +; ALL-NEXT: %1 = sub i32 32, %0
>> +; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
>> +; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa,
>> %while.end.loopexit ]
>> +; ALL: ret i32 %i.0.lcssa
>> +
>> +; Function Attrs: norecurse nounwind readnone uwtable
>> +define i32 @ctlz_zero_check_lshr(i32 %n) {
>> +entry:
>> + %tobool4 = icmp eq i32 %n, 0
>> + br i1 %tobool4, label %while.end, label %while.body.preheader
>> +
>> +while.body.preheader: ; preds = %entry
>> + br label %while.body
>> +
>> +while.body: ; preds =
>> %while.body.preheader, %while.body
>> + %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
>> + %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n,
>> %while.body.preheader ]
>> + %shr = lshr i32 %n.addr.05, 1
>> + %inc = add nsw i32 %i.06, 1
>> + %tobool = icmp eq i32 %shr, 0
>> + br i1 %tobool, label %while.end.loopexit, label %while.body
>> +
>> +while.end.loopexit: ; preds = %while.body
>> + br label %while.end
>> +
>> +while.end: ; preds =
>> %while.end.loopexit, %entry
>> + %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
>> + ret i32 %i.0.lcssa
>> +}
>> +
>> +; Recognize CTLZ builtin pattern.
>> +; Here it will replace the loop -
>> +; assume builtin is always profitable.
>> +;
>> ; int ctlz(int n)
>> ; {
>> ; n = n >= 0 ? n : -n;
>> @@ -161,6 +207,44 @@ while.end:
>> ; Here it will replace the loop -
>> ; assume builtin is always profitable.
>> ;
>> +; int ctlz_lshr(int n)
>> +; {
>> +; int i = 0;
>> +; while(n >>= 1) {
>> +; i++;
>> +; }
>> +; return i;
>> +; }
>> +;
>> +; ALL: entry
>> +; ALL: %0 = lshr i32 %n, 1
>> +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
>> +; ALL-NEXT: %2 = sub i32 32, %1
>> +; ALL-NEXT: %3 = add i32 %2, 1
>> +; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
>> +; ALL: ret i32 %i.0.lcssa
>> +
>> +; Function Attrs: norecurse nounwind readnone uwtable
>> +define i32 @ctlz_lshr(i32 %n) {
>> +entry:
>> + br label %while.cond
>> +
>> +while.cond: ; preds = %while.cond,
>> %entry
>> + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
>> + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
>> + %shr = lshr i32 %n.addr.0, 1
>> + %tobool = icmp eq i32 %shr, 0
>> + %inc = add nsw i32 %i.0, 1
>> + br i1 %tobool, label %while.end, label %while.cond
>> +
>> +while.end: ; preds = %while.cond
>> + ret i32 %i.0
>> +}
>> +
>> +; Recognize CTLZ builtin pattern.
>> +; Here it will replace the loop -
>> +; assume builtin is always profitable.
>> +;
>> ; int ctlz_add(int n, int i0)
>> ; {
>> ; n = n >= 0 ? n : -n;
>> @@ -204,6 +288,45 @@ while.end:
>> ; Here it will replace the loop -
>> ; assume builtin is always profitable.
>> ;
>> +; int ctlz_add_lshr(int n, int i0)
>> +; {
>> +; int i = i0;
>> +; while(n >>= 1) {
>> +; i++;
>> +; }
>> +; return i;
>> +; }
>> +;
>> +; ALL: entry
>> +; ALL: %0 = lshr i32 %n, 1
>> +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
>> +; ALL-NEXT: %2 = sub i32 32, %1
>> +; ALL-NEXT: %3 = add i32 %2, 1
>> +; ALL-NEXT: %4 = add i32 %2, %i0
>> +; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
>> +; ALL: ret i32 %i.0.lcssa
>> +;
>> +; Function Attrs: norecurse nounwind readnone uwtable
>> +define i32 @ctlz_add_lshr(i32 %n, i32 %i0) {
>> +entry:
>> + br label %while.cond
>> +
>> +while.cond: ; preds = %while.cond,
>> %entry
>> + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
>> + %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
>> + %shr = lshr i32 %n.addr.0, 1
>> + %tobool = icmp eq i32 %shr, 0
>> + %inc = add nsw i32 %i.0, 1
>> + br i1 %tobool, label %while.end, label %while.cond
>> +
>> +while.end: ; preds = %while.cond
>> + ret i32 %i.0
>> +}
>> +
>> +; Recognize CTLZ builtin pattern.
>> +; Here it will replace the loop -
>> +; assume builtin is always profitable.
>> +;
>> ; int ctlz_sext(short in)
>> ; {
>> ; int n = in;
>> @@ -240,6 +363,45 @@ while.cond:
>> %tobool = icmp eq i32 %shr, 0
>> %inc = add nsw i32 %i.0, 1
>> br i1 %tobool, label %while.end, label %while.cond
>> +
>> +while.end: ; preds = %while.cond
>> + ret i32 %i.0
>> +}
>> +
>> +; Recognize CTLZ builtin pattern.
>> +; Here it will replace the loop -
>> +; assume builtin is always profitable.
>> +;
>> +; int ctlz_sext_lshr(short in)
>> +; {
>> +; int i = 0;
>> +; while(in >>= 1) {
>> +; i++;
>> +; }
>> +; return i;
>> +; }
>> +;
>> +; ALL: entry
>> +; ALL: %0 = lshr i32 %n, 1
>> +; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
>> +; ALL-NEXT: %2 = sub i32 32, %1
>> +; ALL-NEXT: %3 = add i32 %2, 1
>> +; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
>> +; ALL: ret i32 %i.0.lcssa
>> +
>> +; Function Attrs: norecurse nounwind readnone uwtable
>> +define i32 @ctlz_sext_lshr(i16 %in) {
>> +entry:
>> + %n = sext i16 %in to i32
>> + br label %while.cond
>> +
>> +while.cond: ; preds = %while.cond,
>> %entry
>> + %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
>> + %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
>> + %shr = lshr i32 %n.addr.0, 1
>> + %tobool = icmp eq i32 %shr, 0
>> + %inc = add nsw i32 %i.0, 1
>> + br i1 %tobool, label %while.end, label %while.cond
>>
>> while.end: ; preds = %while.cond
>> ret i32 %i.0
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180711/9a141021/attachment.html>
More information about the llvm-commits
mailing list