[llvm-branch-commits] [llvm] 4f568fb - [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit

Nemanja Ivanovic via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 28 18:41:34 PST 2020


Author: Nemanja Ivanovic
Date: 2020-12-28T20:36:16-06:00
New Revision: 4f568fbd21636c7c8d071f1901084cc0ae87f3ee

URL: https://github.com/llvm/llvm-project/commit/4f568fbd21636c7c8d071f1901084cc0ae87f3ee
DIFF: https://github.com/llvm/llvm-project/commit/4f568fbd21636c7c8d071f1901084cc0ae87f3ee.diff

LOG: [PowerPC] Do not emit HW loop when TLS var accessed in PHI of loop exit

If any PHI nodes in loop exit blocks have incoming values from the
loop that are accesses of TLS variables with local dynamic or general
dynamic TLS model, the address will be computed inside the loop. Since
this includes a call to __tls_get_addr, this will in turn cause the
CTR loops verifier to complain.
Disable CTR loops in such cases.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=48527

Added: 
    llvm/test/CodeGen/PowerPC/pr48527.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 011056c21b13..4de1f2aba416 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -335,6 +335,29 @@ PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
   return BaseT::getUserCost(U, Operands, CostKind);
 }
 
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const Value *MemAddr, const PPCTargetMachine &TM,
+                           SmallPtrSetImpl<const Value *> &Visited) {
+  // No need to traverse again if we already checked this operand.
+  if (!Visited.insert(MemAddr).second)
+    return false;
+  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+  if (!GV) {
+    // Recurse to check for constants that refer to TLS global variables.
+    if (const auto *CV = dyn_cast<Constant>(MemAddr))
+      for (const auto &CO : CV->operands())
+        if (memAddrUsesCTR(CO, TM, Visited))
+          return true;
+    return false;
+  }
+
+  if (!GV->isThreadLocal())
+    return false;
+  TLSModel::Model Model = TM.getTLSModel(GV);
+  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
 bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
                              SmallPtrSetImpl<const Value *> &Visited) {
   const PPCTargetMachine &TM = ST->getTargetMachine();
@@ -353,31 +376,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
     return false;
   };
 
-  // Determining the address of a TLS variable results in a function call in
-  // certain TLS models.
-  std::function<bool(const Value *)> memAddrUsesCTR =
-      [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
-    // No need to traverse again if we already checked this operand.
-    if (!Visited.insert(MemAddr).second)
-      return false;
-    const auto *GV = dyn_cast<GlobalValue>(MemAddr);
-    if (!GV) {
-      // Recurse to check for constants that refer to TLS global variables.
-      if (const auto *CV = dyn_cast<Constant>(MemAddr))
-        for (const auto &CO : CV->operands())
-          if (memAddrUsesCTR(CO))
-            return true;
-
-      return false;
-    }
-
-    if (!GV->isThreadLocal())
-      return false;
-    TLSModel::Model Model = TM.getTLSModel(GV);
-    return Model == TLSModel::GeneralDynamic ||
-      Model == TLSModel::LocalDynamic;
-  };
-
   auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
     if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
       return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
@@ -676,7 +674,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
     }
 
     for (Value *Operand : J->operands())
-      if (memAddrUsesCTR(Operand))
+      if (memAddrUsesCTR(Operand, TM, Visited))
         return true;
   }
 
@@ -736,6 +734,24 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
     }
   }
 
+  // If an exit block has a PHI that accesses a TLS variable as one of the
+  // incoming values from the loop, we cannot produce a CTR loop because the
+  // address for that value will be computed in the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  for (auto &BB : ExitBlocks) {
+    for (auto &PHI : BB->phis()) {
+      for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
+           Idx++) {
+        const BasicBlock *IncomingBB = PHI.getIncomingBlock(Idx);
+        const Value *IncomingValue = PHI.getIncomingValue(Idx);
+        if (L->contains(IncomingBB) &&
+            memAddrUsesCTR(IncomingValue, TM, Visited))
+          return false;
+      }
+    }
+  }
+
   LLVMContext &C = L->getHeader()->getContext();
   HWLoopInfo.CountType = TM.isPPC64() ?
     Type::getInt64Ty(C) : Type::getInt32Ty(C);

diff  --git a/llvm/test/CodeGen/PowerPC/pr48527.ll b/llvm/test/CodeGen/PowerPC/pr48527.ll
new file mode 100644
index 000000000000..eaff15ce071e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr48527.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+%struct.e.0.12.28.44.104.108.112.188 = type { i32 }
+%struct.t.1.13.29.45.105.109.113.189 = type { i64, i64 }
+
+ at g = external local_unnamed_addr global %struct.e.0.12.28.44.104.108.112.188, align 4
+ at aj = external thread_local local_unnamed_addr global %struct.t.1.13.29.45.105.109.113.189, align 8
+
+define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 {
+; CHECK-LABEL: _ZNK1q1rEv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    lwz 30, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
+; CHECK-NEXT:    ld 29, .LC0 at toc@l(3)
+; CHECK-NEXT:    addis 3, 2, aj at got@tlsgd at ha
+; CHECK-NEXT:    addi 3, 3, aj at got@tlsgd at l
+; CHECK-NEXT:    bl __tls_get_addr(aj at tlsgd)
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 4, 3, 8
+; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:  .LBB0_1: # %monotonic.i
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lwz 5, 0(29)
+; CHECK-NEXT:    andi. 5, 5, 255
+; CHECK-NEXT:    bne 0, .LBB0_4
+; CHECK-NEXT:  # %bb.2: # %for.cond.i
+; CHECK-NEXT:    #
+; CHECK-NEXT:    addi 30, 30, -1
+; CHECK-NEXT:    cmplwi 30, 0
+; CHECK-NEXT:    bne 0, .LBB0_1
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    mr 4, 3
+; CHECK-NEXT:  .LBB0_4: # %if.end
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    std 3, 0(4)
+; CHECK-NEXT:    addi 1, 1, 64
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* undef, align 4
+  br label %monotonic.i
+
+for.cond.i:                                       ; preds = %monotonic.i
+  %exitcond.not = icmp eq i32 %inc.i, %0
+  br i1 %exitcond.not, label %if.end, label %monotonic.i
+
+monotonic.i:                                      ; preds = %for.cond.i, %entry
+  %i.018.i = phi i32 [ %inc.i, %for.cond.i ], [ 0, %entry ]
+  %1 = load atomic i32, i32* getelementptr inbounds (%struct.e.0.12.28.44.104.108.112.188, %struct.e.0.12.28.44.104.108.112.188* @g, i64 0, i32 0) monotonic, align 4
+  %conv.i = trunc i32 %1 to i8
+  %tobool.not.i = icmp eq i8 %conv.i, 0
+  %inc.i = add nuw nsw i32 %i.018.i, 1
+  br i1 %tobool.not.i, label %for.cond.i, label %if.end
+
+if.end:                                           ; preds = %monotonic.i, %for.cond.i
+  %.sink = phi i64* [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 1), %monotonic.i ], [ getelementptr inbounds (%struct.t.1.13.29.45.105.109.113.189, %struct.t.1.13.29.45.105.109.113.189* @aj, i64 0, i32 0), %for.cond.i ]
+  store i64 1, i64* %.sink, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }


        


More information about the llvm-branch-commits mailing list