[llvm] r347191 - [ARM] Remove trunc sinks in ARM CGP

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 19 03:34:40 PST 2018


Author: sam_parker
Date: Mon Nov 19 03:34:40 2018
New Revision: 347191

URL: http://llvm.org/viewvc/llvm-project?rev=347191&view=rev
Log:
[ARM] Remove trunc sinks in ARM CGP
    
Truncs are treated as sources if their produce a value of the same
type as the one we currently trying to promote. Truncs used to be
considered as a sink if their operand was the same value type.
    
We now allow smaller types in the search, so we should search through
truncs that produce a smaller value. These truncs can then be
converted to an AND mask.
    
This leaves sinks as being:
  - points where the value in the register is being observed, such as
    an icmp, switch or store.
  - points where value types have to match, such as calls and returns.
  - zext are included to ease the transformation and are generally
    removed later on.
    
During this change, it also became apart from truncating sinks was
broken: if a sink used a source, its type information had already
been lost by the time the truncation happens. So I've changed the
method of caching the type information.

Differential Revision: https://reviews.llvm.org/D54515

Added:
    llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll
    llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll
    llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll

Modified: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp?rev=347191&r1=347190&r2=347191&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp Mon Nov 19 03:34:40 2018
@@ -109,24 +109,25 @@ EnableDSPWithImms("arm-enable-scalar-dsp
 namespace {
 class IRPromoter {
   SmallPtrSet<Value*, 8> NewInsts;
-  SmallVector<Instruction*, 4> InstsToRemove;
-  DenseMap<Value*, Type*> TruncTysMap;
+  SmallPtrSet<Instruction*, 4> InstsToRemove;
+  DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
   SmallPtrSet<Value*, 8> Promoted;
   Module *M = nullptr;
   LLVMContext &Ctx;
   IntegerType *ExtTy = nullptr;
   IntegerType *OrigTy = nullptr;
-
-  void PrepareConstants(SmallPtrSetImpl<Value*> &Visited,
-                         SmallPtrSetImpl<Instruction*> &SafeToPromote);
-  void ExtendSources(SmallPtrSetImpl<Value*> &Sources);
-  void PromoteTree(SmallPtrSetImpl<Value*> &Visited,
-                   SmallPtrSetImpl<Value*> &Sources,
-                   SmallPtrSetImpl<Instruction*> &Sinks,
-                   SmallPtrSetImpl<Instruction*> &SafeToPromote);
-  void TruncateSinks(SmallPtrSetImpl<Value*> &Sources,
-                     SmallPtrSetImpl<Instruction*> &Sinks);
-  void Cleanup(SmallPtrSetImpl<Value*> &Visited);
+  SmallPtrSetImpl<Value*> *Visited;
+  SmallPtrSetImpl<Value*> *Sources;
+  SmallPtrSetImpl<Instruction*> *Sinks;
+  SmallPtrSetImpl<Instruction*> *SafeToPromote;
+
+  void ReplaceAllUsersOfWith(Value *From, Value *To);
+  void PrepareConstants(void);
+  void ExtendSources(void);
+  void ConvertTruncs(void);
+  void PromoteTree(void);
+  void TruncateSinks(void);
+  void Cleanup(void);
 
 public:
   IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
@@ -192,6 +193,10 @@ static bool GreaterThanTypeSize(Value *V
   return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
 }
 
+static bool LessThanTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
+}
+
 /// Some instructions can use 8- and 16-bit operands, and we don't need to
 /// promote anything larger. We disallow booleans to make life easier when
 /// dealing with icmps but allow any other integer that is <= 16 bits. Void
@@ -214,7 +219,7 @@ static bool isSupportedType(Value *V) {
 }
 
 /// Return true if the given value is a source in the use-def chain, producing
-/// a narrow (i8, i16) value. These values will be zext to start the promotion
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
 /// of the tree to i32. We guarantee that these won't populate the upper bits
 /// of the register. ZExt on the loads will be free, and the same for call
 /// return values because we only accept ones that guarantee a zeroext ret val.
@@ -246,16 +251,22 @@ static bool isSink(Value *V) {
   // proved that the data value is kept within the range of the original data
   // type.
 
+  // Sinks are:
+  // - points where the value in the register is being observed, such as an
+  //   icmp, switch or store.
+  // - points where value types have to match, such as calls and returns.
+  // - zext are included to ease the transformation and are generally removed
+  //   later on.
   if (auto *Store = dyn_cast<StoreInst>(V))
     return LessOrEqualTypeSize(Store->getValueOperand());
   if (auto *Return = dyn_cast<ReturnInst>(V))
     return LessOrEqualTypeSize(Return->getReturnValue());
-  if (auto *Trunc = dyn_cast<TruncInst>(V))
-    return EqualTypeSize(Trunc->getOperand(0));
   if (auto *ZExt = dyn_cast<ZExtInst>(V))
     return GreaterThanTypeSize(ZExt);
+  if (auto *Switch = dyn_cast<SwitchInst>(V))
+    return LessThanTypeSize(Switch->getCondition());
   if (auto *ICmp = dyn_cast<ICmpInst>(V))
-    return ICmp->isSigned();
+    return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
 
   return isa<CallInst>(V);
 }
@@ -426,23 +437,32 @@ static Intrinsic::ID getNarrowIntrinsic(
   llvm_unreachable("unhandled opcode for narrow intrinsic");
 }
 
-static void ReplaceAllUsersOfWith(Value *From, Value *To) {
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
   SmallVector<Instruction*, 4> Users;
   Instruction *InstTo = dyn_cast<Instruction>(To);
+  bool ReplacedAll = true;
+
+  LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
+             << "\n");
+
   for (Use &U : From->uses()) {
     auto *User = cast<Instruction>(U.getUser());
-    if (InstTo && User->isIdenticalTo(InstTo))
+    if (InstTo && User->isIdenticalTo(InstTo)) {
+      ReplacedAll = false;
       continue;
+    }
     Users.push_back(User);
   }
 
   for (auto *U : Users)
     U->replaceUsesOfWith(From, To);
+
+  if (ReplacedAll)
+    if (auto *I = dyn_cast<Instruction>(From))
+      InstsToRemove.insert(I);
 }
 
-void
-IRPromoter::PrepareConstants(SmallPtrSetImpl<Value*> &Visited,
-                             SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+void IRPromoter::PrepareConstants() {
   IRBuilder<> Builder{Ctx};
   // First step is to prepare the instructions for mutation. Most constants
   // just need to be zero extended into their new type, but complications arise
@@ -463,12 +483,12 @@ IRPromoter::PrepareConstants(SmallPtrSet
   // immediate as operand 1, we create an equivalent instruction using a
   // positive immediate. That positive immediate can then be zext along with
   // all the other immediates later.
-  for (auto *V : Visited) {
+  for (auto *V : *Visited) {
     if (!isa<Instruction>(V))
       continue;
 
     auto *I = cast<Instruction>(V);
-    if (SafeToPromote.count(I)) {
+    if (SafeToPromote->count(I)) {
 
       if (!isa<OverflowingBinaryOperator>(I))
         continue;
@@ -493,16 +513,16 @@ IRPromoter::PrepareConstants(SmallPtrSet
           NewInst->copyIRFlags(I);
           NewInsts.insert(NewInst);
         }
-        InstsToRemove.push_back(I);
+        InstsToRemove.insert(I);
         I->replaceAllUsesWith(NewVal);
       }
     }
   }
   for (auto *I : NewInsts)
-    Visited.insert(I);
+    Visited->insert(I);
 }
 
-void IRPromoter::ExtendSources(SmallPtrSetImpl<Value*> &Sources) {
+void IRPromoter::ExtendSources() {
   IRBuilder<> Builder{Ctx};
 
   auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
@@ -520,13 +540,13 @@ void IRPromoter::ExtendSources(SmallPtrS
         I->moveAfter(InsertPt);
       NewInsts.insert(I);
     }
+
     ReplaceAllUsersOfWith(V, ZExt);
-    TruncTysMap[ZExt] = TruncTysMap[V];
   };
 
   // Now, insert extending instructions between the sources and their users.
   LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
-  for (auto V : Sources) {
+  for (auto V : *Sources) {
     LLVM_DEBUG(dbgs() << " - " << *V << "\n");
     if (auto *I = dyn_cast<Instruction>(V))
       InsertZExt(I, I);
@@ -540,22 +560,19 @@ void IRPromoter::ExtendSources(SmallPtrS
   }
 }
 
-void IRPromoter::PromoteTree(SmallPtrSetImpl<Value*> &Visited,
-                             SmallPtrSetImpl<Value*> &Sources,
-                             SmallPtrSetImpl<Instruction*> &Sinks,
-                             SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+void IRPromoter::PromoteTree() {
   LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
 
   IRBuilder<> Builder{Ctx};
 
   // Mutate the types of the instructions within the tree. Here we handle
   // constant operands.
-  for (auto *V : Visited) {
-    if (Sources.count(V))
+  for (auto *V : *Visited) {
+    if (Sources->count(V))
       continue;
 
     auto *I = cast<Instruction>(V);
-    if (Sinks.count(I))
+    if (Sinks->count(I))
       continue;
 
     for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
@@ -578,15 +595,15 @@ void IRPromoter::PromoteTree(SmallPtrSet
 
   // Finally, any instructions that should be promoted but haven't yet been,
   // need to be handled using intrinsics.
-  for (auto *V : Visited) {
+  for (auto *V : *Visited) {
     auto *I = dyn_cast<Instruction>(V);
     if (!I)
       continue;
 
-    if (Sources.count(I) || Sinks.count(I))
+    if (Sources->count(I) || Sinks->count(I))
       continue;
 
-    if (!shouldPromote(I) || SafeToPromote.count(I) || NewInsts.count(I))
+    if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
       continue;
   
     assert(EnableDSP && "DSP intrinisc insertion not enabled!");
@@ -600,29 +617,21 @@ void IRPromoter::PromoteTree(SmallPtrSet
     Builder.SetCurrentDebugLocation(I->getDebugLoc());
     Value *Args[] = { I->getOperand(0), I->getOperand(1) };
     CallInst *Call = Builder.CreateCall(DSPInst, Args);
-    ReplaceAllUsersOfWith(I, Call);
-    InstsToRemove.push_back(I);
     NewInsts.insert(Call);
-    TruncTysMap[Call] = OrigTy;
+    ReplaceAllUsersOfWith(I, Call);
   }
 }
 
-void IRPromoter::TruncateSinks(SmallPtrSetImpl<Value*> &Sources,
-                               SmallPtrSetImpl<Instruction*> &Sinks) {
+void IRPromoter::TruncateSinks() {
   LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
 
   IRBuilder<> Builder{Ctx};
 
-  auto InsertTrunc = [&](Value *V) -> Instruction* {
+  auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
     if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
       return nullptr;
 
-    if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V) ||
-        Sources.count(V))
-      return nullptr;
-
-    Type *TruncTy = TruncTysMap[V];
-    if (TruncTy == ExtTy)
+    if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
       return nullptr;
 
     LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
@@ -636,14 +645,15 @@ void IRPromoter::TruncateSinks(SmallPtrS
 
   // Fix up any stores or returns that use the results of the promoted
   // chain.
-  for (auto I : Sinks) {
-    LLVM_DEBUG(dbgs() << " - " << *I << "\n");
+  for (auto I : *Sinks) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
 
     // Handle calls separately as we need to iterate over arg operands.
     if (auto *Call = dyn_cast<CallInst>(I)) {
       for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
         Value *Arg = Call->getArgOperand(i);
-        if (Instruction *Trunc = InsertTrunc(Arg)) {
+        Type *Ty = TruncTysMap[Call][i];
+        if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
           Trunc->moveBefore(Call);
           Call->setArgOperand(i, Trunc);
         }
@@ -651,9 +661,20 @@ void IRPromoter::TruncateSinks(SmallPtrS
       continue;
     }
 
+    // Special case switches because we need to truncate the condition.
+    if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+      Type *Ty = TruncTysMap[Switch][0];
+      if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+        Trunc->moveBefore(Switch);
+        Switch->setCondition(Trunc);
+      }
+      continue;
+    }
+
     // Now handle the others.
     for (unsigned i = 0; i < I->getNumOperands(); ++i) {
-      if (Instruction *Trunc = InsertTrunc(I->getOperand(i))) {
+      Type *Ty = TruncTysMap[I][i];
+      if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
         Trunc->moveBefore(I);
         I->setOperand(i, Trunc);
       }
@@ -661,35 +682,32 @@ void IRPromoter::TruncateSinks(SmallPtrS
   }
 }
 
-void IRPromoter::Cleanup(SmallPtrSetImpl<Value*> &Visited) {
+void IRPromoter::Cleanup() {
   // Some zexts will now have become redundant, along with their trunc
   // operands, so remove them
-  for (auto V : Visited) {
-    if (!isa<ZExtInst>(V))
+  for (auto V : *Visited) {
+    if (!isa<CastInst>(V))
       continue;
 
-    auto ZExt = cast<ZExtInst>(V);
+    auto ZExt = cast<CastInst>(V);
     if (ZExt->getDestTy() != ExtTy)
       continue;
 
     Value *Src = ZExt->getOperand(0);
     if (ZExt->getSrcTy() == ZExt->getDestTy()) {
-      LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast.\n");
+      LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
+                 << "\n");
       ReplaceAllUsersOfWith(ZExt, Src);
-      InstsToRemove.push_back(ZExt);
       continue;
     }
 
     // For any truncs that we insert to handle zexts, we can replace the
     // result of the zext with the input to the trunc.
-    if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
+    if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
       auto *Trunc = cast<TruncInst>(Src);
       assert(Trunc->getOperand(0)->getType() == ExtTy &&
              "expected inserted trunc to be operating on i32");
-      LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: "
-                 << *Trunc->getOperand(0));
       ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
-      InstsToRemove.push_back(ZExt);
     }
   }
 
@@ -705,6 +723,29 @@ void IRPromoter::Cleanup(SmallPtrSetImpl
   Promoted.clear();
 }
 
+void IRPromoter::ConvertTruncs() {
+  IRBuilder<> Builder{Ctx};
+
+  for (auto *V : *Visited) {
+    if (!isa<TruncInst>(V) || Sources->count(V))
+      continue;
+
+    auto *Trunc = cast<TruncInst>(V);
+    assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
+
+    Builder.SetInsertPoint(Trunc);
+    unsigned NumBits =
+      cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
+    ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+    Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+
+    if (auto *I = dyn_cast<Instruction>(Masked))
+      NewInsts.insert(I);
+
+    ReplaceAllUsersOfWith(Trunc, Masked);
+  }
+}
+
 void IRPromoter::Mutate(Type *OrigTy,
                         SmallPtrSetImpl<Value*> &Visited,
                         SmallPtrSetImpl<Value*> &Sources,
@@ -718,28 +759,47 @@ void IRPromoter::Mutate(Type *OrigTy,
   assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
          "original type not smaller than extended type");
 
-  // Cache original types.
-  for (auto *V : Visited)
-    TruncTysMap[V] = V->getType();
+  this->Visited = &Visited;
+  this->Sources = &Sources;
+  this->Sinks = &Sinks;
+  this->SafeToPromote = &SafeToPromote;
+
+  // Cache original types of the values that will likely need truncating
+  for (auto *I : Sinks) {
+    if (auto *Call = dyn_cast<CallInst>(I)) {
+      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+        Value *Arg = Call->getArgOperand(i);
+        TruncTysMap[Call].push_back(Arg->getType());
+      }
+    } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+      TruncTysMap[I].push_back(Switch->getCondition()->getType());
+    else {
+      for (unsigned i = 0; i < I->getNumOperands(); ++i)
+        TruncTysMap[I].push_back(I->getOperand(i)->getType());
+    }
+  }
 
   // Convert adds and subs using negative immediates to equivalent instructions
   // that use positive constants.
-  PrepareConstants(Visited, SafeToPromote);
+  PrepareConstants();
 
   // Insert zext instructions between sources and their users.
-  ExtendSources(Sources);
+  ExtendSources();
+
+  // Convert any truncs, that aren't sources, into AND masks.
+  ConvertTruncs();
 
   // Promote visited instructions, mutating their types in place. Also insert
   // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
   // promote.
-  PromoteTree(Visited, Sources, Sinks, SafeToPromote);
+  PromoteTree();
 
   // Insert trunc instructions for use by calls, stores etc...
-  TruncateSinks(Sources, Sinks);
+  TruncateSinks();
 
   // Finally, remove unecessary zexts and truncs, delete old instructions and
   // clear the data structures.
-  Cleanup(Visited);
+  Cleanup();
 
   LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
 }

Modified: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll?rev=347191&r1=347190&r2=347191&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll Mon Nov 19 03:34:40 2018
@@ -200,11 +200,23 @@ exit:
   ret i1 %retval
 }
 
+; CHECK-LABEL: promote_arg_pass_to_call
+; CHECK-NOT: uxt
+define i16 @promote_arg_pass_to_call(i16 zeroext %arg1, i16 zeroext %arg2) {
+  %conv = add nuw i16 %arg1, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  %cmp = icmp ult i16 %mul, %arg2
+  %trunc = trunc i16 %arg1 to i8
+  %res = call zeroext i16 @dummy4(i1 %cmp, i8 %trunc, i16 %arg1)
+  ret i16 %res
+}
+
 
 declare i32 @assert(...)
 declare i8 @dummy_i8(i8)
 declare i8 @dummy2(i8*, i8, i8)
 declare i16 @dummy3(i16)
+declare i16 @dummy4(i1, i8, i16)
 
 declare dso_local i32 @e(...) local_unnamed_addr #1
 declare dso_local zeroext i16 @f(...) local_unnamed_addr #1

Modified: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll?rev=347191&r1=347190&r2=347191&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll Mon Nov 19 03:34:40 2018
@@ -122,36 +122,6 @@ entry:
   ret i1 %or
 }
 
-; CHECK-COMMON-LABEL: icmp_switch_trunc:
-; CHECK-COMMON-NOT: uxt
-define i16 @icmp_switch_trunc(i16 zeroext %arg) {
-entry:
-  %conv = add nuw i16 %arg, 15
-  %mul = mul nuw nsw i16 %conv, 3
-  %trunc = trunc i16 %arg to i3
-  switch i3 %trunc, label %default [
-    i3 0, label %sw.bb
-    i3 1, label %sw.bb.i
-  ]
-
-sw.bb:
-  %cmp0 = icmp ult i16 %mul, 127
-  %select = select i1 %cmp0, i16 %mul, i16 127
-  br label %exit
-
-sw.bb.i:
-  %cmp1 = icmp ugt i16 %mul, 34
-  %select.i = select i1 %cmp1, i16 %mul, i16 34
-  br label %exit
-
-default:
-  br label %exit
-
-exit:
-  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
-  ret i16 %res
-}
-
 ; We currently only handle truncs as sinks, so a uxt will still be needed for
 ; the icmp ugt instruction.
 ; CHECK-COMMON-LABEL: urem_trunc_icmps
@@ -187,47 +157,6 @@ exit:
   ret void
 }
 
-; CHECK-COMMON-LABEL: phi_feeding_switch
-; CHECK-COMMON: ldrb
-; CHECK-COMMON: uxtb
-define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) {
-entry:
-  %pre = load i8, i8* %memblock, align 1
-  %conv = trunc i16 %arg to i8
-  br label %header
-
-header:
-  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
-  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
-  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
-  switch i8 %phi.0, label %default [
-    i8 43, label %for.inc.i
-    i8 45, label %for.inc.i.i
-  ]
-
-for.inc.i:
-  %xor = xor i8 %phi.1, 1
-  br label %latch
-
-for.inc.i.i:
-  %and = and i8 %phi.1, 3
-  br label %latch
-
-default:
-  %sub = sub i8 %phi.0, 1
-  %cmp2 = icmp ugt i8 %sub, 4
-  br i1 %cmp2, label %latch, label %exit
-
-latch:
-  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
-  %count = add nuw i8 %phi.2, 1
-  store i8 %count, i8* %store, align 1
-  br label %header
-
-exit:
-  ret void
-}
-
 ; Check that %exp requires uxth in all cases, and will also be required to
 ; promote %1 for the call - unless we can generate a uadd16.
 ; CHECK-COMMON-LABEL: zext_load_sink_call:
@@ -254,40 +183,6 @@ exit:
   ret i32 %exitval
 }
 
-%class.ae = type { i8 }
-%class.x = type { i8 }
-%class.v = type { %class.q }
-%class.q = type { i16 }
-
-; CHECK-COMMON-LABEL: trunc_i16_i9_switch
-; CHECK-COMMON-NOT: uxt
-define i32 @trunc_i16_i9_switch(%class.ae* %this) {
-entry:
-  %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this)
-  %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call)
-  %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0
-  %1 = load i16, i16* %0, align 2
-  %2 = trunc i16 %1 to i9
-  %trunc = and i9 %2, -64
-  switch i9 %trunc, label %cleanup.fold.split [
-    i9 0, label %cleanup
-    i9 -256, label %if.then7
-  ]
-
-if.then7:
-  %3 = and i16 %1, 7
-  %tobool = icmp eq i16 %3, 0
-  %cond = select i1 %tobool, i32 2, i32 1
-  br label %cleanup
-
-cleanup.fold.split:
-  br label %cleanup
-
-cleanup:
-  %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ]
-  ret i32 %retval.0
-}
-
 ; CHECK-COMMON-LABEL: bitcast_i16
 ; CHECK-COMMON-NOT: uxt
 define i16 @bitcast_i16(i16 zeroext %arg0, i16 zeroext %arg1) {
@@ -332,8 +227,6 @@ entry:
   ret i8 %res
 }
 
-declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr
-declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr
 declare i32 @dummy(i32, i32)
 
 @d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1
@@ -583,6 +476,8 @@ if.end:
   ret i8 %retval
 }
 
+; CHECK-COMMON-LABEL: bitcast_i1
+; CHECK-COMMON-NOT: uxt
 define i32 @bitcast_i1(i16 zeroext %a, i32 %b, i32 %c) {
 entry:
   %0 = bitcast i1 1 to i1
@@ -601,3 +496,40 @@ exit:
   %retval = phi i32 [ %select, %if.then ], [ 0, %entry ]
   ret i32 %retval
 }
+
+; CHECK-COMMON-LABEL: search_back_through_trunc
+; CHECK-COMMON-NOT: uxt
+; CHECK-COMMON: cmp
+; CHECK-COMMON: strb
+; CHECK-COMMON: strb
+define void @search_back_through_trunc(i8* %a, i8* %b, i8* %c, i8* %d, i16* %e) {
+entry:
+  %0 = load i8, i8* %a, align 1
+  %conv106 = zext i8 %0 to i16
+  %shl = shl nuw i16 %conv106, 8
+  %1 = load i8, i8* %b, align 1
+  %conv108 = zext i8 %1 to i16
+  %or109 = or i16 %shl, %conv108
+  %2 = load i8, i8* %c, align 1
+  %conv119 = zext i8 %2 to i16
+  %shl120 = shl nuw i16 %conv119, 8
+  %3 = load i8, i8* %d, align 1
+  %conv122 = zext i8 %3 to i16
+  %or123 = or i16 %shl120, %conv122
+  %cmp133 = icmp eq i16 %or109, %or123
+  br i1 %cmp133, label %if.end183, label %if.else136
+
+if.else136:
+  %4 = load i16, i16* %e, align 2
+  %extract.t854 = trunc i16 %4 to i8
+  %extract856 = lshr i16 %4, 8
+  %extract.t857 = trunc i16 %extract856 to i8
+  br label %if.end183
+
+if.end183:
+  %w.0.off0 = phi i8 [ %extract.t854, %if.else136 ], [ %1, %entry ]
+  %w.0.off8 = phi i8 [ %extract.t857, %if.else136 ], [ %2, %entry ]
+  store i8 %w.0.off8, i8* %c, align 1
+  store i8 %w.0.off0, i8* %d, align 1
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll?rev=347191&r1=347190&r2=347191&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll Mon Nov 19 03:34:40 2018
@@ -172,3 +172,15 @@ if.end:
 exit:
   ret i16 %unrelated
 }
+
+; CHECK-COMMON-LABEL: promote_arg_return
+; CHECK-COMMON-NOT: uxt
+; CHECK-COMMON: strb
+define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) {
+  %add = add nuw i16 %arg1, 15
+  %mul = mul nuw nsw i16 %add, 3
+  %cmp = icmp ult i16 %mul, %arg2
+  %conv = zext i1 %cmp to i8
+  store i8 %conv, i8* %res
+  ret i16 %arg1
+}

Added: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll?rev=347191&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll Mon Nov 19 03:34:40 2018
@@ -0,0 +1,168 @@
+; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s
+
+; CHECK-LABEL: truncate_source_phi_switch
+; CHECK: ldrb
+; CHECK: uxtb
+define void @truncate_source_phi_switch(i8* %memblock, i8* %store, i16 %arg) {
+entry:
+  %pre = load i8, i8* %memblock, align 1
+  %conv = trunc i16 %arg to i8
+  br label %header
+
+header:
+  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
+  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
+  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
+  switch i8 %phi.0, label %default [
+    i8 43, label %for.inc.i
+    i8 45, label %for.inc.i.i
+  ]
+
+for.inc.i:
+  %xor = xor i8 %phi.1, 1
+  br label %latch
+
+for.inc.i.i:
+  %and = and i8 %phi.1, 3
+  br label %latch
+
+default:
+  %sub = sub i8 %phi.0, 1
+  %cmp2 = icmp ugt i8 %sub, 4
+  br i1 %cmp2, label %latch, label %exit
+
+latch:
+  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
+  %count = add nuw i8 %phi.2, 1
+  store i8 %count, i8* %store, align 1
+  br label %header
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: icmp_switch_source:
+; CHECK-NOT: uxt
+define i16 @icmp_switch_source(i16 zeroext %arg) {
+entry:
+  %conv = add nuw i16 %arg, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  switch i16 %arg, label %default [
+    i16 0, label %sw.bb
+    i16 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+; CHECK-LABEL: icmp_switch_narrow_source:
+; CHECK-NOT: uxt
+define i16 @icmp_switch_narrow_source(i8 zeroext %arg) {
+entry:
+  %conv = zext i8 %arg to i16
+  %add = add nuw i16 %conv, 15
+  %mul = mul nuw nsw i16 %add, 3
+  switch i8 %arg, label %default [
+    i8 0, label %sw.bb
+    i8 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+; CHECK-LABEL: icmp_switch_trunc:
+; CHECK-NOT: uxt
+define i16 @icmp_switch_trunc(i16 zeroext %arg) {
+entry:
+  %conv = add nuw i16 %arg, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  %trunc = trunc i16 %arg to i3
+  switch i3 %trunc, label %default [
+    i3 0, label %sw.bb
+    i3 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+%class.ae = type { i8 }
+%class.x = type { i8 }
+%class.v = type { %class.q }
+%class.q = type { i16 }
+declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr
+declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr
+
+; CHECK-LABEL: trunc_i16_i9_switch
+; CHECK-NOT: uxt
+define i32 @trunc_i16_i9_switch(%class.ae* %this) {
+entry:
+  %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this)
+  %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call)
+  %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0
+  %1 = load i16, i16* %0, align 2
+  %2 = trunc i16 %1 to i9
+  %trunc = and i9 %2, -64
+  switch i9 %trunc, label %cleanup.fold.split [
+    i9 0, label %cleanup
+    i9 -256, label %if.then7
+  ]
+
+if.then7:
+  %3 = and i16 %1, 7
+  %tobool = icmp eq i16 %3, 0
+  %cond = select i1 %tobool, i32 2, i32 1
+  br label %cleanup
+
+cleanup.fold.split:
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ]
+  ret i32 %retval.0
+}




More information about the llvm-commits mailing list