[llvm] FIXME: or when the offset was "add nuw" (PR #130451)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 8 17:30:04 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/130451.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (+6-2)
- (added) llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll (+227)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 2462ec33e0c20..0e021294a7a76 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3020,10 +3020,14 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
{ValNumActiveBitsOffset, Start},
/*FMFSource=*/nullptr, "iv.final");
+ // Check if the offset was added with NUW flag
+ bool OffsetAddHasNUW = OffsetIsZero;
+ if (auto *OffsetAddInst = dyn_cast<BinaryOperator>(ValNumActiveBitsOffset))
+ OffsetAddHasNUW |= OffsetAddInst->hasNoUnsignedWrap();
+
auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
- /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
- // FIXME: or when the offset was `add nuw`
+ /*HasNUW=*/OffsetAddHasNUW, /*HasNSW=*/true));
// We know loop's backedge-taken count, but what's loop's trip count?
Value *LoopTripCount =
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll
new file mode 100644
index 0000000000000..8a2ae25bb8e17
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll
@@ -0,0 +1,227 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+; Test cases where the offset addition has the nuw flag and this property
+; should be preserved in the backedge taken count calculation.
+
+; Basic case: add nuw - check that HasNUW is preserved
+define i8 @preserve_nuw_flag(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT: [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT: [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK: end:
+; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT: ret i8 [[IV_RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nuw i8 %iv, %extraoffset
+ %val.shifted = lshr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+ br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+ %iv.res = phi i8 [ %iv, %loop ]
+ %nbits.res = phi i8 [ %nbits, %loop ]
+ %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+ %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+ %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+ call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+ ret i8 %iv.res
+}
+
+; Test with both nuw and nsw flags on the offset addition
+define i8 @preserve_nuw_nsw_flags(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_nsw_flags(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT: [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT: [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT: [[NBITS:%.*]] = add nuw nsw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK: end:
+; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT: ret i8 [[IV_RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nuw nsw i8 %iv, %extraoffset
+ %val.shifted = lshr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+ br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+ %iv.res = phi i8 [ %iv, %loop ]
+ %nbits.res = phi i8 [ %nbits, %loop ]
+ %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+ %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+ %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+ call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+ ret i8 %iv.res
+}
+
+; Test with left shift instead of logical right shift
+define i8 @preserve_nuw_flag_shl(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag_shl(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.cttz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT: [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT: [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = shl i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK: end:
+; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT: ret i8 [[IV_RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nuw i8 %iv, %extraoffset
+ %val.shifted = shl i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+ br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+ %iv.res = phi i8 [ %iv, %loop ]
+ %nbits.res = phi i8 [ %nbits, %loop ]
+ %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+ %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+ %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+ call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+ ret i8 %iv.res
+}
+
+; Test with arithmetic right shift
+define i8 @preserve_nuw_flag_ashr(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag_ashr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]]
+; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0
+; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
+; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]])
+; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]]
+; CHECK: end:
+; CHECK-NEXT: [[IV_RES:%.*]] = phi i8 [ [[IV]], [[LOOP]] ]
+; CHECK-NEXT: [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT: ret i8 [[IV_RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nuw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+ br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+ %iv.res = phi i8 [ %iv, %loop ]
+ %nbits.res = phi i8 [ %nbits, %loop ]
+ %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+ %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+ %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+ call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+ ret i8 %iv.res
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/130451
More information about the llvm-commits
mailing list