[llvm] d607d6f - [LegalizeTypes] Keep non-negative info in SUB(CTLZ) (#186338)

Tue Mar 24 01:17:22 PDT 2026

Author: Alexey Merzlyakov
Date: 2026-03-24T09:17:17+01:00
New Revision: d607d6f78f803d51a961d991811f0f2f439f695a

URL: https://github.com/llvm/llvm-project/commit/d607d6f78f803d51a961d991811f0f2f439f695a
DIFF: https://github.com/llvm/llvm-project/commit/d607d6f78f803d51a961d991811f0f2f439f695a.diff

LOG: [LegalizeTypes] Keep non-negative info in SUB(CTLZ) (#186338)

When legalizing CTLZ for a wider type with a SUB bit-width compensation,
the result for a zero-defined case can have an extra bit that exceeds
the maximum value of the original type width. Unlike CTLS, which covers
all bits within its type width, CTLZ leads to producing undefined
KnownBits. So, after subtraction, the KnownBits could represent a
negative value, which prevents subsequent optimizations.

This patch ensures that non-negative information is preserved during the
SUB(CTLZ) expansion, so that KnownBits after SUB will take a stricter
range within the type width, thus allowing further combine optimizations
following by SUB. Particularly, this enables the DAG Combiner to remove
unnecessary AND instructions for CTLZ output type conversion.

Fixes #136516

---------

Co-authored-by: Simon Pilgrim <git at redking.me.uk>

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/test/CodeGen/AArch64/cls.ll
    llvm/test/CodeGen/AArch64/pr61549.ll
    llvm/test/CodeGen/VE/Scalar/ctlz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 4a27f804d6720..3d5b838c2ff8f 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -743,15 +743,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
     // Zero extend to the promoted type and do the count there.
     SDValue Op = ZExtPromotedInteger(N->getOperand(0));
 
+    // At this stage SUB is guaranteed to be positive no-wrap,
+    // that to be used in further KnownBits optimizations.
+    SDNodeFlags SubFlags;
+    SubFlags.setNoUnsignedWrap(true);
     if (!N->isVPOpcode())
       return DAG.getNode(ISD::SUB, dl, NVT,
                          DAG.getNode(N->getOpcode(), dl, NVT, Op),
-                         ExtractLeadingBits);
+                         ExtractLeadingBits, SubFlags);
     SDValue Mask = N->getOperand(1);
     SDValue EVL = N->getOperand(2);
     return DAG.getNode(ISD::VP_SUB, dl, NVT,
                        DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
-                       ExtractLeadingBits, Mask, EVL);
+                       ExtractLeadingBits, Mask, EVL, SubFlags);
   }
   if (CtlzOpcode == ISD::CTLZ_ZERO_UNDEF ||
       CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) {

diff  --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
index 0af9adcb3695a..cb0c0aa5ec2ed 100644
--- a/llvm/test/CodeGen/AArch64/cls.ll
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -173,3 +173,47 @@ define <4 x i32> @neon_cls_v4i32_knownbits(<4 x i32> %a) nounwind {
 }
 
 declare <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32>) nounwind readnone
+
+; Test ensures that the compiler generates no extra instructions
+; for __builtin_clzg output type conversion
+define i32 @foo8(i8 %0) {
+; CHECK-SD-LABEL: foo8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    and w8, w0, #0xff
+; CHECK-SD-NEXT:    clz w8, w8
+; CHECK-SD-NEXT:    sub w0, w8, #24
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: foo8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    and w8, w0, #0xff
+; CHECK-GI-NEXT:    clz w8, w8
+; CHECK-GI-NEXT:    sub w8, w8, #24
+; CHECK-GI-NEXT:    and w0, w8, #0xff
+; CHECK-GI-NEXT:    ret
+  %2 = tail call i8 @llvm.ctlz.i8(i8 %0, i1 false)
+  %3 = zext nneg i8 %2 to i32
+  ret i32 %3
+}
+
+; Test ensures that the compiler generates no extra instructions
+; for __builtin_clzg output type conversion
+define i32 @foo16(i16 %0) {
+; CHECK-SD-LABEL: foo16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    and w8, w0, #0xffff
+; CHECK-SD-NEXT:    clz w8, w8
+; CHECK-SD-NEXT:    sub w0, w8, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: foo16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    and w8, w0, #0xffff
+; CHECK-GI-NEXT:    clz w8, w8
+; CHECK-GI-NEXT:    sub w8, w8, #16
+; CHECK-GI-NEXT:    and w0, w8, #0xffff
+; CHECK-GI-NEXT:    ret
+  %2 = tail call i16 @llvm.ctlz.i16(i16 %0, i1 false)
+  %3 = zext nneg i16 %2 to i32
+  ret i32 %3
+}

diff  --git a/llvm/test/CodeGen/AArch64/pr61549.ll b/llvm/test/CodeGen/AArch64/pr61549.ll
index e66ee7d219cc5..a45703c2fc3b8 100644
--- a/llvm/test/CodeGen/AArch64/pr61549.ll
+++ b/llvm/test/CodeGen/AArch64/pr61549.ll
@@ -10,8 +10,8 @@ define i35 @f(i35 %0) {
 ; CHECK-NEXT:    sdiv x10, x9, x8
 ; CHECK-NEXT:    msub x8, x10, x8, x9
 ; CHECK-NEXT:    clz x8, x8
-; CHECK-NEXT:    sub x8, x8, #29
-; CHECK-NEXT:    ubfx x0, x8, #5, #30
+; CHECK-NEXT:    sub w8, w8, #29
+; CHECK-NEXT:    ubfx x0, x8, #5, #2
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: f:

diff  --git a/llvm/test/CodeGen/VE/Scalar/ctlz.ll b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
index c2af9753f8bb6..0ff7cf16f6335 100644
--- a/llvm/test/CodeGen/VE/Scalar/ctlz.ll
+++ b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
@@ -212,7 +212,7 @@ define signext i16 @func16sx(i16 signext %p) {
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
 ; CHECK-NEXT:    adds.w.sx %s0, -16, %s0
-; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 false)
   ret i16 %r
@@ -224,7 +224,7 @@ define zeroext i16 @func16zx(i16 zeroext %p) {
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
 ; CHECK-NEXT:    adds.w.sx %s0, -16, %s0
-; CHECK-NEXT:    and %s0, %s0, (48)0
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 false)
   ret i16 %r
@@ -237,7 +237,7 @@ define signext i8 @func8sx(i8 signext %p) {
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
 ; CHECK-NEXT:    adds.w.sx %s0, -24, %s0
-; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 false)
   ret i8 %r
@@ -249,7 +249,7 @@ define zeroext i8 @func8zx(i8 zeroext %p) {
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
 ; CHECK-NEXT:    adds.w.sx %s0, -24, %s0
-; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 false)
   ret i8 %r