[llvm] FIXME: or when the offset was "add nuw" (PR #130451)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 8 17:30:04 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: AZero13 (AZero13)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/130451.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (+6-2) 
- (added) llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll (+227) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 2462ec33e0c20..0e021294a7a76 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3020,10 +3020,14 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
                                            {ValNumActiveBitsOffset, Start},
                                            /*FMFSource=*/nullptr, "iv.final");
 
+  // Check if the offset was added with NUW flag
+  bool OffsetAddHasNUW = OffsetIsZero;
+  if (auto *OffsetAddInst = dyn_cast<BinaryOperator>(ValNumActiveBitsOffset))
+    OffsetAddHasNUW |= OffsetAddInst->hasNoUnsignedWrap();
+
   auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
       IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
-      /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
-  // FIXME: or when the offset was `add nuw`
+      /*HasNUW=*/OffsetAddHasNUW, /*HasNSW=*/true));
 
   // We know loop's backedge-taken count, but what's loop's trip count?
   Value *LoopTripCount =
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll
new file mode 100644
index 0000000000000..8a2ae25bb8e17
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll
@@ -0,0 +1,227 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+; Test cases where the offset addition has the nuw flag and this property
+; should be preserved in the backedge taken count calculation.
+
+; Basic case: add nuw - check that HasNUW is preserved
+define i8 @preserve_nuw_flag(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT:    [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT:    [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT:    [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw i8 %iv, %extraoffset
+  %val.shifted = lshr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+; Test with both nuw and nsw flags on the offset addition
+define i8 @preserve_nuw_nsw_flags(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_nsw_flags(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT:    [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT:    [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT:    [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw nsw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw nsw i8 %iv, %extraoffset
+  %val.shifted = lshr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+; Test with left shift instead of logical right shift
+define i8 @preserve_nuw_flag_shl(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.cttz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT:    [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT:    [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT:    [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = shl i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw i8 %iv, %extraoffset
+  %val.shifted = shl i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+; Test with arithmetic right shift
+define i8 @preserve_nuw_flag_ashr(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag_ashr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0
+; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/130451


More information about the llvm-commits mailing list