[PATCH] D146429: [LSR] Fix wrapping bug in lsr-term-fold logic
Philip Reames via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 20 13:47:37 PDT 2023
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG091422adc1d7: [LSR] Fix wrapping bug in lsr-term-fold logic (authored by reames).
Changed prior to commit:
https://reviews.llvm.org/D146429?vs=506218&id=506704#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D146429/new/
https://reviews.llvm.org/D146429
Files:
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
===================================================================
--- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
+++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
@@ -192,20 +192,18 @@
; In this case, the integer IV has a larger bitwidth than the pointer IV.
; This means that the smaller IV may wrap around multiple times before
; the original loop exit is taken.
-; FIXME: miscompile
define void @iv_size(ptr %a, i128 %N) {
; CHECK-LABEL: @iv_size(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[N:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i128 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N:%.*]], [[ENTRY]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i128 [[LSR_IV]], -1
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
-; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i128 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6762,7 +6762,25 @@
continue;
}
- // FIXME: This does not properly account for overflow.
+ // Check that we can compute the value of AddRec on the exiting iteration
+ // without soundness problems. There are two cases to be worried about:
+ // 1) BECount could be 255 with type i8. Simply adding one would be
+ // incorrect. We may need one extra bit to represent the unsigned
+ // trip count.
+ // 2) The multiplication of stride by TC may wrap around. This is subtle
+ // because computing the result accounting for wrap is insufficient.
+ // In order to use the result in an exit test, we must also know that
+ // AddRec doesn't take the same value on any previous iteration.
+ // The simplest case to consider is a candidate IV which is narrower
+ // than the trip count (and thus original IV), but this can also
+ // happen due to non-unit strides on the candidate IVs.
+ ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE));
+ ConstantRange BECountCR = SE.getUnsignedRange(BECount);
+ unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits();
+ unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType());
+ if (NoOverflowBitWidth > ARBitWidth)
+ continue;
+
const SCEV *TermValueSLocal = SE.getAddExpr(
AddRec->getOperand(0),
SE.getTruncateOrZeroExtend(
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D146429.506704.patch
Type: text/x-patch
Size: 3450 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230320/fef5b9a6/attachment.bin>
More information about the llvm-commits
mailing list