[llvm] FIXME: or when the offset was "add nuw" (PR #130451)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 8 17:29:35 PST 2025


https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/130451

None

>From 3799bfbeb0ea5b9f958b537144d1de836ec450b5 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 8 Mar 2025 20:18:32 -0500
Subject: [PATCH 1/2] Pre-commit tests [NFC]

---
 .../X86/preserve-nuw-flag-shift-until-zero.ll | 227 ++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll

diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll
new file mode 100644
index 0000000000000..8a2ae25bb8e17
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-nuw-flag-shift-until-zero.ll
@@ -0,0 +1,227 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+; Test cases where the offset addition has the nuw flag and this property
+; should be preserved in the backedge taken count calculation.
+
+; Basic case: add nuw - check that HasNUW is preserved
+define i8 @preserve_nuw_flag(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT:    [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT:    [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT:    [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw i8 %iv, %extraoffset
+  %val.shifted = lshr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+; Test with both nuw and nsw flags on the offset addition
+define i8 @preserve_nuw_nsw_flags(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_nsw_flags(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT:    [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT:    [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT:    [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw nsw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw nsw i8 %iv, %extraoffset
+  %val.shifted = lshr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+; Test with left shift instead of logical right shift
+define i8 @preserve_nuw_flag_shl(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.cttz.i8(i8 [[VAL:%.*]], i1 false)
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_NUMACTIVEBITS_OFFSET:%.*]] = add nsw i8 [[VAL_NUMACTIVEBITS]], [[TMP0]]
+; CHECK-NEXT:    [[IV_FINAL:%.*]] = call i8 @llvm.smax.i8(i8 [[VAL_NUMACTIVEBITS_OFFSET]], i8 [[START:%.*]])
+; CHECK-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub nsw i8 [[IV_FINAL]], [[START]]
+; CHECK-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw nsw i8 [[LOOP_BACKEDGETAKENCOUNT]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[LOOP_IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1
+; CHECK-NEXT:    [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]]
+; CHECK-NEXT:    [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = shl i8 [[VAL]], [[NBITS]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV_FINAL]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw i8 %iv, %extraoffset
+  %val.shifted = shl i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}
+
+; Test with arithmetic right shift
+define i8 @preserve_nuw_flag_ashr(i8 %val, i8 %start, i8 %extraoffset) {
+; CHECK-LABEL: @preserve_nuw_flag_ashr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET:%.*]]
+; CHECK-NEXT:    [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0
+; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 1
+; CHECK-NEXT:    call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]])
+; CHECK-NEXT:    br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
+; CHECK-NEXT:    [[IV_RES:%.*]] = phi i8 [ [[IV]], [[LOOP]] ]
+; CHECK-NEXT:    [[NBITS_RES:%.*]] = phi i8 [ [[NBITS]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]])
+; CHECK-NEXT:    ret i8 [[IV_RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nuw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
+
+  br i1 %val.shifted.iszero, label %end, label %loop
+
+end:
+  %iv.res = phi i8 [ %iv, %loop ]
+  %nbits.res = phi i8 [ %nbits, %loop ]
+  %val.shifted.res = phi i8 [ %val.shifted, %loop ]
+  %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
+  %iv.next.res = phi i8 [ %iv.next, %loop ]
+
+  call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
+
+  ret i8 %iv.res
+}

>From a950fba8a6e47083bee01c5fe5286c338514dc0e Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 8 Mar 2025 20:20:34 -0500
Subject: [PATCH 2/2] FIXME: or when the offset was `add nuw`

Add add-nuw detection
---
 llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 2462ec33e0c20..0e021294a7a76 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3020,10 +3020,14 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
                                            {ValNumActiveBitsOffset, Start},
                                            /*FMFSource=*/nullptr, "iv.final");
 
+  // Check if the offset was added with NUW flag
+  bool OffsetAddHasNUW = OffsetIsZero;
+  if (auto *OffsetAddInst = dyn_cast<BinaryOperator>(ValNumActiveBitsOffset))
+    OffsetAddHasNUW |= OffsetAddInst->hasNoUnsignedWrap();
+
   auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
       IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
-      /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
-  // FIXME: or when the offset was `add nuw`
+      /*HasNUW=*/OffsetAddHasNUW, /*HasNSW=*/true));
 
   // We know loop's backedge-taken count, but what's loop's trip count?
   Value *LoopTripCount =



More information about the llvm-commits mailing list