[PATCH] D119200: Match-up type sizes for possible extensions, based on actual bit-size rather than rounded-up byte size.

Mon Feb 7 16:07:13 PST 2022

clin1 created this revision.
clin1 added reviewers: kazu, fhahn.
Herald added subscribers: javed.absar, hiraditya.
clin1 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Avoids asserting in SCEV when the target extension type is actually smaller than the source type.


https://reviews.llvm.org/D119200

Files:
  llvm/lib/Analysis/LoopAccessAnalysis.cpp
  llvm/test/Transforms/LoopLoadElim/loop-i1-count.ll


Index: llvm/test/Transforms/LoopLoadElim/loop-i1-count.ll
===================================================================

--- /dev/null
+++ llvm/test/Transforms/LoopLoadElim/loop-i1-count.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=loop-load-elim %s | FileCheck %s
+
+; The backedge taken count of this loop is an i1 type, and the IV is i8.
+; The math in LoopAccessAnalysis was rounding the type sizes to bytes and
+; believing them equal, causing a size mismatch.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = external dso_local local_unnamed_addr global [1 x i32], align 4
+
+define dso_local void @test() local_unnamed_addr {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND3:%.*]]
+; CHECK:       for.cond3:
+; CHECK-NEXT:    [[H_0:%.*]] = phi i8 [ undef, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[COND_END_FOR_COND_CLEANUP_LOOPEXIT_CRIT_EDGE:%.*]] ]
+; CHECK-NEXT:    [[IDXPROM11:%.*]] = sext i8 [[H_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 [[IDXPROM11]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       cond.end.for.cond.cleanup.loopexit_crit_edge:
+; CHECK-NEXT:    [[ADD]] = add i8 [[H_0]], undef
+; CHECK-NEXT:    br label [[FOR_COND3]]
+; CHECK:       for.body:
+; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX27]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 true, undef
+; CHECK-NEXT:    br i1 [[TMP0]], label [[COND_END_FOR_COND_CLEANUP_LOOPEXIT_CRIT_EDGE]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+;
+entry:
+  br label %for.cond3
+
+for.cond3:                                        ; preds = %cond.end.for.cond.cleanup.loopexit_crit_edge, %entry
+  %h.0 = phi i8 [ undef, %entry ], [ %add, %cond.end.for.cond.cleanup.loopexit_crit_edge ]
+  %idxprom11 = sext i8 %h.0 to i64
+  %arrayidx27 = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 %idxprom11
+  br label %for.body
+
+cond.end.for.cond.cleanup.loopexit_crit_edge:     ; preds = %for.body
+  %add = add i8 %h.0, undef
+  br label %for.cond3
+
+for.body:                                         ; preds = %for.body, %for.cond3
+  store i32 0, i32* %arrayidx27, align 4
+  %0 = or i1 true, undef
+  br i1 %0, label %cond.end.for.cond.cleanup.loopexit_crit_edge, label %for.body, !llvm.loop !1
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.mustprogress"}
Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1445,13 +1445,13 @@
 
   const SCEV *CastedDist = &Dist;
   const SCEV *CastedProduct = Product;
-  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
-  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
+  uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
+  uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
 
   // The dependence distance can be positive/negative, so we sign extend Dist;
   // The multiplication of the absolute stride in bytes and the
   // backedgeTakenCount is non-negative, so we zero extend Product.
-  if (DistTypeSize > ProductTypeSize)
+  if (DistTypeSizeBits > ProductTypeSizeBits)
     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
   else
     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
@@ -2260,12 +2260,12 @@
   // The Stride can be positive/negative, so we sign extend Stride;
   // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
   const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
-  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
+  uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
+  uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
   const SCEV *CastedStride = StrideExpr;
   const SCEV *CastedBECount = BETakenCount;
   ScalarEvolution *SE = PSE->getSE();
-  if (BETypeSize >= StrideTypeSize)
+  if (BETypeSizeBits >= StrideTypeSizeBits)
     CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
   else
     CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D119200.406642.patch
Type: text/x-patch
Size: 4499 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220208/29d746ca/attachment.bin>