[llvm] [GISel] computeKnownBits - add CTLS handling (PR #178063)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 9 01:04:15 PST 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/178063
>From 4e451ed4380e97ad7f0b9195a3cb52230c482461 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Mon, 26 Jan 2026 20:46:14 +0000
Subject: [PATCH 1/9] add ctls handling in gisel
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 34692f0b4c4ee..00430d671d74a 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -681,6 +681,14 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(LowBits);
break;
}
+ case TargetOpcode::G_CTLS: {
+ unsigned MinRedundantSignBits = computeNumSignBits(MI.getOperand(0).getReg(), Depth + 1) - 1;
+
+ ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
+ APInt(BitWidth, BitWidth));
+ Known = Range.toKnownBits();
+ break;
+ }
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
Register InVec = Extract.getVectorReg();
>From d6372a0161c2f5b278b16b948b5cbe509dd1a203 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Mon, 26 Jan 2026 20:46:38 +0000
Subject: [PATCH 2/9] format
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 00430d671d74a..014e0efcaed2d 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -682,7 +682,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CTLS: {
- unsigned MinRedundantSignBits = computeNumSignBits(MI.getOperand(0).getReg(), Depth + 1) - 1;
+ unsigned MinRedundantSignBits =
+ computeNumSignBits(MI.getOperand(0).getReg(), Depth + 1) - 1;
ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
APInt(BitWidth, BitWidth));
>From ce6ea5b0e23b93a40894e33a47ae96819e36a223 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Mon, 26 Jan 2026 20:47:03 +0000
Subject: [PATCH 3/9] add tests
---
llvm/test/CodeGen/AArch64/arm64-clrsb.ll | 137 +++++++++++++++++++-
llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll | 85 ++++++++++++
llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll | 23 ++++
3 files changed, 242 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
index 4597c6178e2ba..2f4b0ac3653a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -91,6 +91,137 @@ entry:
ret i64 %0
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
+define i8 @cls_i8(i8 %x) {
+; CHECK-LABEL: cls_i8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: cls w8, w8
+; CHECK-NEXT: sub w0, w8, #24
+; CHECK-NEXT: ret
+
+ %a = ashr i8 %x, 7
+ %b = xor i8 %x, %a
+ %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false)
+ %d = sub i8 %c, 1
+ ret i8 %d
+}
+
+; The result is in the range [1-31], so we don't need an andi after the cls.
+define i32 @cls_i32_knownbits(i32 %x) {
+; CHECK-LABEL: cls_i32_knownbits:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %a = ashr i32 %x, 31
+ %b = xor i32 %x, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = and i32 %d, 31
+ ret i32 %e
+}
+
+; There are at least 16 redundant sign bits so we don't need an ori after the cls.
+define i32 @cls_i32_knownbits_2(i16 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %sext = sext i16 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 16
+ ret i32 %e
+}
+
+; Check that the range max in ctls cls knownbits
+; is not set to 32
+define i64 @cls_i64_not_32(i64 %x) {
+; CHECK-SD-LABEL: cls_i64_not_32:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: asr x8, x0, #16
+; CHECK-SD-NEXT: cls x8, x8
+; CHECK-SD-NEXT: orr x0, x8, #0x10
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i64_not_32:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: asr x8, x0, #63
+; CHECK-GI-NEXT: eor x8, x8, x0, asr #16
+; CHECK-GI-NEXT: lsl x8, x8, #1
+; CHECK-GI-NEXT: orr x8, x8, #0x1
+; CHECK-GI-NEXT: clz x8, x8
+; CHECK-GI-NEXT: orr x0, x8, #0x10
+; CHECK-GI-NEXT: ret
+ %val = ashr i64 %x, 16
+ %a = ashr i64 %val, 63
+ %b = xor i64 %val, %a
+ %c = shl i64 %b, 1
+ %d = or i64 %c, 1
+ %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true)
+ %f = or i64 %e, 16
+ ret i64 %f
+}
+
+; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_3(i8 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %sext = sext i8 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 24
+ ret i32 %e
+}
+
+; Negative test. We only know there is at least 1 redundant sign bit. We can't
+; remove the ori.
+define i32 @cls_i32_knownbits_4(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sbfx w8, w0, #0, #31
+; CHECK-NEXT: cls w8, w8
+; CHECK-NEXT: orr w0, w8, #0x1
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 1
+ %ashr = ashr i32 %shl, 1
+ %a = ashr i32 %ashr, 31
+ %b = xor i32 %ashr, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 1
+ ret i32 %e
+ }
+
+; Negative test. Check that the number of sign bits is not
+; overestimated. If it is, the orr disappears.
+define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) {
+; CHECK-SD-LABEL: cls_i32_knownbits_no_overestimate:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: asr w8, w0, #15
+; CHECK-SD-NEXT: cls w8, w8
+; CHECK-SD-NEXT: orr w0, w8, #0x10
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i32_knownbits_no_overestimate:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: asr w8, w0, #31
+; CHECK-GI-NEXT: eor w8, w8, w0, asr #15
+; CHECK-GI-NEXT: clz w8, w8
+; CHECK-GI-NEXT: sub w8, w8, #1
+; CHECK-GI-NEXT: orr w0, w8, #0x10
+; CHECK-GI-NEXT: ret
+ %ashr = ashr i32 %x, 15
+ %a = ashr i32 %ashr, 31
+ %b = xor i32 %ashr, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 16
+ ret i32 %e
+ }
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll
index 3f403fd8cb9e5..5708077fcefb1 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll
@@ -139,3 +139,88 @@ define i64 @cls_i64_2(i64 %x) {
%e = call i64 @llvm.ctlz.i64(i64 %d, i1 true)
ret i64 %e
}
+
+; The result is in the range [1-31], so we don't need an andi after the cls.
+define i32 @cls_i32_knownbits(i32 %x) {
+; CHECK-LABEL: cls_i32_knownbits:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ret
+ %a = ashr i32 %x, 31
+ %b = xor i32 %x, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = and i32 %d, 31
+ ret i32 %e
+}
+
+; There are at least 16 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_2(i16 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ret
+ %sext = sext i16 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 16
+ ret i32 %e
+}
+
+; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
+define i32 @cls_i32_knownbits_3(i8 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ret
+ %sext = sext i8 %x to i32
+ %a = ashr i32 %sext, 31
+ %b = xor i32 %sext, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 24
+ ret i32 %e
+}
+
+; Negative test. We only know there is at least 1 redundant sign bit. We can't
+; remove the ori.
+define i32 @cls_i32_knownbits_4(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: cls a0, a0
+; CHECK-NEXT: ori a0, a0, 1
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 1
+ %ashr = ashr i32 %shl, 1
+ %a = ashr i32 %ashr, 31
+ %b = xor i32 %ashr, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 1
+ ret i32 %e
+ }
+
+; Negative test. Check that the number of sign bits is not
+; overestimated. If it is, the ori disappears.
+define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) {
+; CHECK-LABEL: cls_i32_knownbits_no_overestimate:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 15
+; CHECK-NEXT: srai a0, a0, 31
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: clz a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ori a0, a0, 16
+; CHECK-NEXT: ret
+ %ashr = ashr i32 %x, 15
+ %a = ashr i32 %ashr, 31
+ %b = xor i32 %ashr, %a
+ %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
+ %d = sub i32 %c, 1
+ %e = or i32 %d, 16
+ ret i32 %e
+ }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll
index 5faf1079a7804..fa32f3ed39c92 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll
@@ -112,3 +112,26 @@ define i64 @cls_i64_2(i64 %x) {
%e = call i64 @llvm.ctlz.i64(i64 %d, i1 true)
ret i64 %e
}
+
+; Check that the range max in ctls cls knownbits
+; is not set to 32. If it is, then ori disappears.
+define i64 @cls_i64_not_32(i64 %x) {
+; CHECK-LABEL: cls_i64_not_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 16
+; CHECK-NEXT: srai a0, a0, 63
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: ori a0, a0, 1
+; CHECK-NEXT: clz a0, a0
+; CHECK-NEXT: ori a0, a0, 16
+; CHECK-NEXT: ret
+ %val = ashr i64 %x, 16
+ %a = ashr i64 %val, 63
+ %b = xor i64 %val, %a
+ %c = shl i64 %b, 1
+ %d = or i64 %c, 1
+ %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true)
+ %f = or i64 %e, 16
+ ret i64 %f
+}
>From 4ae324f8bb81da35e62ebddf2e5ff24a108a356a Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 28 Jan 2026 18:22:37 +0000
Subject: [PATCH 4/9] more accurate logic
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 21 +-
.../AArch64/GlobalISel/knownbits-ctls.mir | 192 ++++++++++++++++++
llvm/test/CodeGen/AArch64/arm64-clrsb.ll | 28 ++-
llvm/test/CodeGen/AArch64/cls.ll | 74 +++++--
4 files changed, 283 insertions(+), 32 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 014e0efcaed2d..919ddc10790f8 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringExtras.h"
@@ -21,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -682,11 +685,23 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CTLS: {
- unsigned MinRedundantSignBits =
- computeNumSignBits(MI.getOperand(0).getReg(), Depth + 1) - 1;
+ KnownBits SrcOpKnown;
+ auto Reg = MI.getOperand(1).getReg();
+
+ computeKnownBitsImpl(Reg, SrcOpKnown, DemandedElts, Depth + 1);
+ unsigned MinRedundantSignBits = SrcOpKnown.countMinSignBits() - 1;
+
+ if (SrcOpKnown.isConstant()) {
+ Known = KnownBits::makeConstant(APInt(BitWidth, MinRedundantSignBits));
+ break;
+ }
+
+ unsigned MaxUpperRedundantSignBits =
+ MRI.getType(Reg).getScalarSizeInBits() - 1;
ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
- APInt(BitWidth, BitWidth));
+ APInt(BitWidth, MaxUpperRedundantSignBits));
+
Known = Range.toKnownBits();
break;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir
new file mode 100644
index 0000000000000..2c13de467ce02
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir
@@ -0,0 +1,192 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s
+
+---
+name: CTLSConst8AmbigousMaxSignbitNum
+body: |
+ bb.1:
+ ; NOTE: Currently we do not estimate max sign bits in KnownBits,
+ ; so while we could get more accurate here, this is currently
+ ; not possible.
+ ; CHECK-LABEL: name: @CTLSConst8AmbigousMaxSignbitNum
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00110000 SignBits:2
+ ; CHECK-NEXT: %2:_ KnownBits:00001000 SignBits:4
+ ; CHECK-NEXT: %3:_ KnownBits:00??0000 SignBits:2
+ ; CHECK-NEXT: %4:_ KnownBits:00??1000 SignBits:2
+ ; CHECK-NEXT: %5:_ KnownBits:00000??? SignBits:5
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 48
+ %2:_(s8) = G_CONSTANT i8 8
+ %3:_(s8) = G_AND %0, %1
+ %4:_(s8) = G_OR %3, %2
+ %5:_(s8) = G_CTLS %4
+...
+
+---
+name: CTLSNoKnown8
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSNoKnown8
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000??? SignBits:13
+ %0:_(s8) = COPY $b0
+ %1:_(s16) = G_CTLS %0
+...
+
+---
+name: CTLSNoKnown16
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSNoKnown16
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:000000000000???? SignBits:12
+ %0:_(s16) = COPY $h0
+ %1:_(s16) = G_CTLS %0
+...
+
+---
+name: CTLSNoKnown32
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSNoKnown32
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000000000????? SignBits:11
+ %0:_(s32) = COPY $s0
+ %1:_(s16) = G_CTLS %0
+...
+
+
+---
+name: CTLSHalfKnown8
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSHalfKnown8
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000100 SignBits:5
+ ; CHECK-NEXT: %2:_ KnownBits:00000?00 SignBits:5
+ ; CHECK-NEXT: %3:_ KnownBits:000001?? SignBits:5
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 4
+ %2:_(s8) = G_AND %0, %1
+ %3:_(s8) = G_CTLS %2
+...
+
+---
+name: CTLSHalfKnown16
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSHalfKnown16
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000001000000 SignBits:9
+ ; CHECK-NEXT: %2:_ KnownBits:000000000?000000 SignBits:9
+ ; CHECK-NEXT: %3:_ KnownBits:0000000000001??? SignBits:12
+ %0:_(s16) = COPY $h0
+ %1:_(s16) = G_CONSTANT i16 64
+ %2:_(s16) = G_AND %0, %1
+ %3:_(s16) = G_CTLS %2
+...
+
+---
+name: CTLSHalfConst8Zero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSHalfConst8Zero
+ ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8
+ ; CHECK-NEXT: %1:_ KnownBits:00000111 SignBits:5
+ %0:_(s8) = G_CONSTANT i8 0
+ %1:_(s8) = G_CTLS %0
+...
+
+---
+name: CTLSHalfConst8NonZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSHalfConst8NonZero
+ ; CHECK-NEXT: %0:_ KnownBits:00111000 SignBits:2
+ ; CHECK-NEXT: %1:_ KnownBits:00000001 SignBits:7
+ %0:_(s8) = G_CONSTANT i8 312
+ %1:_(s8) = G_CTLS %0
+...
+
+
+---
+name: CTLSHalfConst16Zero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSHalfConst16Zero
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000001111 SignBits:12
+ %0:_(s16) = G_CONSTANT i16 0
+ %1:_(s16) = G_CTLS %0
+...
+
+---
+name: CTLSHalfConst16NonZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSHalfConst16NonZero
+ ; CHECK-NEXT: %0:_ KnownBits:0101111011111000 SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000000 SignBits:16
+ %0:_(s16) = G_CONSTANT i16 24312
+ %1:_(s16) = G_CTLS %0
+...
+
+---
+name: VectorCTLSConst16Zero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCTLSConst16Zero
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000000 SignBits:16
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000001111 SignBits:12
+ %0:_(s16) = G_CONSTANT i16 0
+ %1:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
+ %3:_(<4 x s16>) = G_CTLS %1
+...
+
+---
+name: VectorCTLSConst16NonZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCTLSConst16NonZero
+ ; CHECK-NEXT: %0:_ KnownBits:0000010011011010 SignBits:5
+ ; CHECK-NEXT: %1:_ KnownBits:0000010011011010 SignBits:5
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000100 SignBits:13
+ %0:_(s16) = G_CONSTANT i16 1242
+ %1:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
+ %3:_(<4 x s16>) = G_CTLS %1
+...
+
+---
+name: VectorCTLSConst8NonZero
+body: |
+ bb.1:
+ ; NOTE: Currently we do not estimate max sign bits in KnownBits,
+ ; so while we could get more accurate here, this is currently
+ ; not possible.
+ ; CHECK-LABEL: name: @VectorCTLSConst8NonZero
+ ; CHECK-NEXT: %0:_ KnownBits:00010111 SignBits:3
+ ; CHECK-NEXT: %1:_ KnownBits:00110010 SignBits:2
+ ; CHECK-NEXT: %2:_ KnownBits:00?10?1? SignBits:2
+ ; CHECK-NEXT: %3:_ KnownBits:00000??? SignBits:5
+ %0:_(s8) = G_CONSTANT i8 23
+ %1:_(s8) = G_CONSTANT i8 50
+ %2:_(<4 x s8>) = G_BUILD_VECTOR %0, %1, %0, %1
+ %3:_(<4 x s8>) = G_CTLS %2
+...
+
+---
+name: CTLSLargeEnough
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSLargeEnough
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000011000 SignBits:11
+ ; CHECK-NEXT: %2:_ KnownBits:00000000000????? SignBits:11
+ ; CHECK-NEXT: %3:_ KnownBits:0000000000001??? SignBits:12
+ %0:_(s16) = COPY $h0
+ %1:_(s16) = G_CONSTANT i16 24
+ %2:_(s16) = G_ASHR %1, %0
+ %3:_(s16) = G_CTLS %2
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
index 2f4b0ac3653a0..0983014101070 100644
--- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -122,10 +122,16 @@ define i32 @cls_i32_knownbits(i32 %x) {
; There are at least 16 redundant sign bits so we don't need an ori after the cls.
define i32 @cls_i32_knownbits_2(i16 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_2:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cls_i32_knownbits_2:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cls w0, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i32_knownbits_2:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: cls w8, w0
+; CHECK-GI-NEXT: orr w0, w8, #0x10
+; CHECK-GI-NEXT: ret
%sext = sext i16 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
@@ -166,10 +172,16 @@ define i64 @cls_i64_not_32(i64 %x) {
; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
define i32 @cls_i32_knownbits_3(i8 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_3:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cls_i32_knownbits_3:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cls w0, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i32_knownbits_3:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: cls w8, w0
+; CHECK-GI-NEXT: orr w0, w8, #0x18
+; CHECK-GI-NEXT: ret
%sext = sext i8 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
index e0cf26356da94..95410b52bdca0 100644
--- a/llvm/test/CodeGen/AArch64/cls.ll
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
@@ -56,10 +57,16 @@ define i32 @cls_i32_knownbits(i32 %x) {
; There are at least 16 redundant sign bits so we don't need an ori after the cls.
define i32 @cls_i32_knownbits_2(i16 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cls_i32_knownbits_2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cls w0, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i32_knownbits_2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cls w8, w0
+; CHECK-GI-NEXT: orr w0, w8, #0x10
+; CHECK-GI-NEXT: ret
%sext = sext i16 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
@@ -72,12 +79,22 @@ define i32 @cls_i32_knownbits_2(i16 signext %x) {
; Check that the range max in ctls cls knownbits
; is not set to 32
define i64 @cls_i64_not_32(i64 %x) {
-; CHECK-LABEL: cls_i64_not_32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: asr x8, x0, #16
-; CHECK-NEXT: cls x8, x8
-; CHECK-NEXT: orr x0, x8, #0x10
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cls_i64_not_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: asr x8, x0, #16
+; CHECK-SD-NEXT: cls x8, x8
+; CHECK-SD-NEXT: orr x0, x8, #0x10
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i64_not_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: asr x8, x0, #63
+; CHECK-GI-NEXT: eor x8, x8, x0, asr #16
+; CHECK-GI-NEXT: lsl x8, x8, #1
+; CHECK-GI-NEXT: orr x8, x8, #0x1
+; CHECK-GI-NEXT: clz x8, x8
+; CHECK-GI-NEXT: orr x0, x8, #0x10
+; CHECK-GI-NEXT: ret
%val = ashr i64 %x, 16
%a = ashr i64 %val, 63
%b = xor i64 %val, %a
@@ -90,10 +107,16 @@ define i64 @cls_i64_not_32(i64 %x) {
; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
define i32 @cls_i32_knownbits_3(i8 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cls_i32_knownbits_3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cls w0, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i32_knownbits_3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cls w8, w0
+; CHECK-GI-NEXT: orr w0, w8, #0x18
+; CHECK-GI-NEXT: ret
%sext = sext i8 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
@@ -125,12 +148,21 @@ define i32 @cls_i32_knownbits_4(i32 signext %x) {
; Negative test. Check that the number of sign bits is not
; overestimated. If it is, the orr disappears.
define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_no_overestimate:
-; CHECK: // %bb.0:
-; CHECK-NEXT: asr w8, w0, #15
-; CHECK-NEXT: cls w8, w8
-; CHECK-NEXT: orr w0, w8, #0x10
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cls_i32_knownbits_no_overestimate:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: asr w8, w0, #15
+; CHECK-SD-NEXT: cls w8, w8
+; CHECK-SD-NEXT: orr w0, w8, #0x10
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cls_i32_knownbits_no_overestimate:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: asr w8, w0, #31
+; CHECK-GI-NEXT: eor w8, w8, w0, asr #15
+; CHECK-GI-NEXT: clz w8, w8
+; CHECK-GI-NEXT: sub w8, w8, #1
+; CHECK-GI-NEXT: orr w0, w8, #0x10
+; CHECK-GI-NEXT: ret
%ashr = ashr i32 %x, 15
%a = ashr i32 %ashr, 31
%b = xor i32 %ashr, %a
>From 94ab16bf89d6c7e0103efa304580448e4458bf3c Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 28 Jan 2026 18:36:07 +0000
Subject: [PATCH 5/9] separate out intrinsic tests for cls
---
llvm/test/CodeGen/AArch64/cls.ll | 21 ------------------
llvm/test/CodeGen/AArch64/intrinsic-cls.ll | 25 ++++++++++++++++++++++
2 files changed, 25 insertions(+), 21 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/intrinsic-cls.ll
diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
index 95410b52bdca0..49978a2a108a3 100644
--- a/llvm/test/CodeGen/AArch64/cls.ll
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -2,27 +2,6 @@
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
-
-define i32 @cls(i32 %t) {
-; CHECK-LABEL: cls:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
- %cls.i = call i32 @llvm.aarch64.cls(i32 %t)
- ret i32 %cls.i
-}
-
-define i32 @cls64(i64 %t) {
-; CHECK-LABEL: cls64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cls x0, x0
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
- %cls.i = call i32 @llvm.aarch64.cls64(i64 %t)
- ret i32 %cls.i
-}
-
declare i32 @llvm.aarch64.cls(i32) nounwind
declare i32 @llvm.aarch64.cls64(i64) nounwind
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cls.ll b/llvm/test/CodeGen/AArch64/intrinsic-cls.ll
new file mode 100644
index 0000000000000..fd37786ab6f6d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cls.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+
+; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
+
+define i32 @cls(i32 %t) {
+; CHECK-LABEL: cls:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %cls.i = call i32 @llvm.aarch64.cls(i32 %t)
+ ret i32 %cls.i
+}
+
+define i32 @cls64(i64 %t) {
+; CHECK-LABEL: cls64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls x0, x0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %cls.i = call i32 @llvm.aarch64.cls64(i64 %t)
+ ret i32 %cls.i
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-SD: {{.*}}
>From 8aafe5b8f34af140a63e7b680b31c8441413df03 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Wed, 28 Jan 2026 18:44:51 +0000
Subject: [PATCH 6/9] remove unused random imports
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 919ddc10790f8..818e5c202525a 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -13,8 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringExtras.h"
@@ -23,7 +21,6 @@
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
>From 6800b31ee1c5c4e1e61b1ecbba0d077bdcddc6e2 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 29 Jan 2026 17:22:27 +0000
Subject: [PATCH 7/9] resolve comments
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 13 +--
.../AArch64/GlobalISel/knownbits-ctls.mir | 107 +++++++++---------
llvm/test/CodeGen/AArch64/arm64-clrsb.ll | 28 ++---
llvm/test/CodeGen/AArch64/cls.ll | 51 +++++----
llvm/test/CodeGen/AArch64/intrinsic-cls.ll | 25 ----
5 files changed, 95 insertions(+), 129 deletions(-)
delete mode 100644 llvm/test/CodeGen/AArch64/intrinsic-cls.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 818e5c202525a..eb0fc236906a0 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -682,19 +682,10 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CTLS: {
- KnownBits SrcOpKnown;
auto Reg = MI.getOperand(1).getReg();
+ unsigned MinRedundantSignBits = computeNumSignBits(Reg, Depth + 1) - 1;
- computeKnownBitsImpl(Reg, SrcOpKnown, DemandedElts, Depth + 1);
- unsigned MinRedundantSignBits = SrcOpKnown.countMinSignBits() - 1;
-
- if (SrcOpKnown.isConstant()) {
- Known = KnownBits::makeConstant(APInt(BitWidth, MinRedundantSignBits));
- break;
- }
-
- unsigned MaxUpperRedundantSignBits =
- MRI.getType(Reg).getScalarSizeInBits() - 1;
+ unsigned MaxUpperRedundantSignBits = MRI.getType(Reg).getScalarSizeInBits();
ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
APInt(BitWidth, MaxUpperRedundantSignBits));
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir
index 2c13de467ce02..4bb2b3d5bc205 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ctls.mir
@@ -98,18 +98,6 @@ body: |
%1:_(s8) = G_CTLS %0
...
----
-name: CTLSHalfConst8NonZero
-body: |
- bb.1:
- ; CHECK-LABEL: name: @CTLSHalfConst8NonZero
- ; CHECK-NEXT: %0:_ KnownBits:00111000 SignBits:2
- ; CHECK-NEXT: %1:_ KnownBits:00000001 SignBits:7
- %0:_(s8) = G_CONSTANT i8 312
- %1:_(s8) = G_CTLS %0
-...
-
-
---
name: CTLSHalfConst16Zero
body: |
@@ -121,17 +109,6 @@ body: |
%1:_(s16) = G_CTLS %0
...
----
-name: CTLSHalfConst16NonZero
-body: |
- bb.1:
- ; CHECK-LABEL: name: @CTLSHalfConst16NonZero
- ; CHECK-NEXT: %0:_ KnownBits:0101111011111000 SignBits:1
- ; CHECK-NEXT: %1:_ KnownBits:0000000000000000 SignBits:16
- %0:_(s16) = G_CONSTANT i16 24312
- %1:_(s16) = G_CTLS %0
-...
-
---
name: VectorCTLSConst16Zero
body: |
@@ -146,34 +123,45 @@ body: |
...
---
-name: VectorCTLSConst16NonZero
+name: VectorCTLSNoneKnown
body: |
bb.1:
- ; CHECK-LABEL: name: @VectorCTLSConst16NonZero
- ; CHECK-NEXT: %0:_ KnownBits:0000010011011010 SignBits:5
- ; CHECK-NEXT: %1:_ KnownBits:0000010011011010 SignBits:5
- ; CHECK-NEXT: %2:_ KnownBits:0000000000000100 SignBits:13
- %0:_(s16) = G_CONSTANT i16 1242
- %1:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
- %3:_(<4 x s16>) = G_CTLS %1
+ ; CHECK-LABEL: name: @VectorCTLSNoneKnown
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000001000 SignBits:12
+ ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:9
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %5:_ KnownBits:000000000000???? SignBits:12
+ %0:_(s16) = COPY $h0
+ %1:_(s16) = G_CONSTANT i16 8
+ %2:_(s16) = G_ASHR %0, %1
+ %3:_(s16) = COPY $h1
+ %4:_(<4 x s16>) = G_BUILD_VECTOR %2, %3, %2, %3
+ %5:_(<4 x s16>) = G_CTLS %4
...
---
-name: VectorCTLSConst8NonZero
+name: VectorCTLSHighBitKnown
body: |
bb.1:
- ; NOTE: Currently we do not estimate max sign bits in KnownBits,
- ; so while we could get more accurate here, this is currently
- ; not possible.
- ; CHECK-LABEL: name: @VectorCTLSConst8NonZero
- ; CHECK-NEXT: %0:_ KnownBits:00010111 SignBits:3
- ; CHECK-NEXT: %1:_ KnownBits:00110010 SignBits:2
- ; CHECK-NEXT: %2:_ KnownBits:00?10?1? SignBits:2
- ; CHECK-NEXT: %3:_ KnownBits:00000??? SignBits:5
- %0:_(s8) = G_CONSTANT i8 23
- %1:_(s8) = G_CONSTANT i8 50
- %2:_(<4 x s8>) = G_BUILD_VECTOR %0, %1, %0, %1
- %3:_(<4 x s8>) = G_CTLS %2
+ ; CHECK-LABEL: name: @VectorCTLSHighBitKnown
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000001000 SignBits:12
+ ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:9
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %4:_ KnownBits:0000000000001010 SignBits:12
+ ; CHECK-NEXT: %5:_ KnownBits:000000000000???? SignBits:12
+ ; CHECK-NEXT: %6:_ KnownBits:???????????????? SignBits:9
+ ; CHECK-NEXT: %7:_ KnownBits:0000000000001??? SignBits:12
+ %0:_(s16) = COPY $h0
+ %1:_(s16) = G_CONSTANT i16 8
+ %2:_(s16) = G_ASHR %0, %1
+ %3:_(s16) = COPY $h1
+ %4:_(s16) = G_CONSTANT i16 10
+ %5:_(s16) = G_ASHR %4, %5
+ %6:_(<4 x s16>) = G_BUILD_VECTOR %2, %5, %2, %4
+ %7:_(<4 x s16>) = G_CTLS %6
...
---
@@ -181,12 +169,27 @@ name: CTLSLargeEnough
body: |
bb.1:
; CHECK-LABEL: name: @CTLSLargeEnough
- ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
- ; CHECK-NEXT: %1:_ KnownBits:0000000000011000 SignBits:11
- ; CHECK-NEXT: %2:_ KnownBits:00000000000????? SignBits:11
- ; CHECK-NEXT: %3:_ KnownBits:0000000000001??? SignBits:12
- %0:_(s16) = COPY $h0
- %1:_(s16) = G_CONSTANT i16 24
- %2:_(s16) = G_ASHR %1, %0
- %3:_(s16) = G_CTLS %2
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000000000000000000000000010000 SignBits:27
+ ; CHECK-NEXT: %2:_ KnownBits:???????????????????????????????? SignBits:17
+ ; CHECK-NEXT: %3:_ KnownBits:0000000000000000000000000001???? SignBits:27
+ %0:_(s32) = COPY $s0
+ %1:_(s32) = G_CONSTANT i32 16
+ %2:_(s32) = G_ASHR %0, %1
+ %3:_(s32) = G_CTLS %2
+...
+
+---
+name: CTLSTooSmallToTrigger
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CTLSTooSmallToTrigger
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000000000000000000000000001111 SignBits:28
+ ; CHECK-NEXT: %2:_ KnownBits:???????????????????????????????? SignBits:16
+ ; CHECK-NEXT: %3:_ KnownBits:000000000000000000000000000????? SignBits:27
+ %0:_(s32) = COPY $s0
+ %1:_(s32) = G_CONSTANT i32 15
+ %2:_(s32) = G_ASHR %0, %1
+ %3:_(s32) = G_CTLS %2
...
diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
index 0983014101070..2f4b0ac3653a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -122,16 +122,10 @@ define i32 @cls_i32_knownbits(i32 %x) {
; There are at least 16 redundant sign bits so we don't need an ori after the cls.
define i32 @cls_i32_knownbits_2(i16 signext %x) {
-; CHECK-SD-LABEL: cls_i32_knownbits_2:
-; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: cls w0, w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cls_i32_knownbits_2:
-; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: cls w8, w0
-; CHECK-GI-NEXT: orr w0, w8, #0x10
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
%sext = sext i16 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
@@ -172,16 +166,10 @@ define i64 @cls_i64_not_32(i64 %x) {
; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
define i32 @cls_i32_knownbits_3(i8 signext %x) {
-; CHECK-SD-LABEL: cls_i32_knownbits_3:
-; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: cls w0, w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cls_i32_knownbits_3:
-; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: cls w8, w0
-; CHECK-GI-NEXT: orr w0, w8, #0x18
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
%sext = sext i8 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
index 49978a2a108a3..53b05d1db24b7 100644
--- a/llvm/test/CodeGen/AArch64/cls.ll
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -1,6 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
+
+define i32 @cls(i32 %t) {
+; CHECK-LABEL: cls:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
+ %cls.i = call i32 @llvm.aarch64.cls(i32 %t)
+ ret i32 %cls.i
+}
+
+define i32 @cls64(i64 %t) {
+; CHECK-LABEL: cls64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls x0, x0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %cls.i = call i32 @llvm.aarch64.cls64(i64 %t)
+ ret i32 %cls.i
+}
declare i32 @llvm.aarch64.cls(i32) nounwind
declare i32 @llvm.aarch64.cls64(i64) nounwind
@@ -36,16 +57,10 @@ define i32 @cls_i32_knownbits(i32 %x) {
; There are at least 16 redundant sign bits so we don't need an ori after the cls.
define i32 @cls_i32_knownbits_2(i16 signext %x) {
-; CHECK-SD-LABEL: cls_i32_knownbits_2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cls w0, w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cls_i32_knownbits_2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cls w8, w0
-; CHECK-GI-NEXT: orr w0, w8, #0x10
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cls_i32_knownbits_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
%sext = sext i16 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
@@ -86,16 +101,10 @@ define i64 @cls_i64_not_32(i64 %x) {
; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
define i32 @cls_i32_knownbits_3(i8 signext %x) {
-; CHECK-SD-LABEL: cls_i32_knownbits_3:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cls w0, w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cls_i32_knownbits_3:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cls w8, w0
-; CHECK-GI-NEXT: orr w0, w8, #0x18
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cls_i32_knownbits_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cls w0, w0
+; CHECK-NEXT: ret
%sext = sext i8 %x to i32
%a = ashr i32 %sext, 31
%b = xor i32 %sext, %a
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cls.ll b/llvm/test/CodeGen/AArch64/intrinsic-cls.ll
deleted file mode 100644
index fd37786ab6f6d..0000000000000
--- a/llvm/test/CodeGen/AArch64/intrinsic-cls.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-
-; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
-
-define i32 @cls(i32 %t) {
-; CHECK-LABEL: cls:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
- %cls.i = call i32 @llvm.aarch64.cls(i32 %t)
- ret i32 %cls.i
-}
-
-define i32 @cls64(i64 %t) {
-; CHECK-LABEL: cls64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cls x0, x0
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
- %cls.i = call i32 @llvm.aarch64.cls64(i64 %t)
- ret i32 %cls.i
-}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-SD: {{.*}}
>From e9e1d3914768354c439869fadee03f5f8d18e1d5 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 29 Jan 2026 20:19:36 +0000
Subject: [PATCH 8/9] remove duplicate tests
---
llvm/test/CodeGen/AArch64/arm64-clrsb.ll | 137 +----------------------
1 file changed, 3 insertions(+), 134 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
index 2f4b0ac3653a0..4597c6178e2ba 100644
--- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -91,137 +91,6 @@ entry:
ret i64 %0
}
-define i8 @cls_i8(i8 %x) {
-; CHECK-LABEL: cls_i8:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cls w8, w8
-; CHECK-NEXT: sub w0, w8, #24
-; CHECK-NEXT: ret
-
- %a = ashr i8 %x, 7
- %b = xor i8 %x, %a
- %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false)
- %d = sub i8 %c, 1
- ret i8 %d
-}
-
-; The result is in the range [1-31], so we don't need an andi after the cls.
-define i32 @cls_i32_knownbits(i32 %x) {
-; CHECK-LABEL: cls_i32_knownbits:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
- %a = ashr i32 %x, 31
- %b = xor i32 %x, %a
- %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
- %d = sub i32 %c, 1
- %e = and i32 %d, 31
- ret i32 %e
-}
-
-; There are at least 16 redundant sign bits so we don't need an ori after the cls.
-define i32 @cls_i32_knownbits_2(i16 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_2:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
- %sext = sext i16 %x to i32
- %a = ashr i32 %sext, 31
- %b = xor i32 %sext, %a
- %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
- %d = sub i32 %c, 1
- %e = or i32 %d, 16
- ret i32 %e
-}
-
-; Check that the range max in ctls cls knownbits
-; is not set to 32
-define i64 @cls_i64_not_32(i64 %x) {
-; CHECK-SD-LABEL: cls_i64_not_32:
-; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: asr x8, x0, #16
-; CHECK-SD-NEXT: cls x8, x8
-; CHECK-SD-NEXT: orr x0, x8, #0x10
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cls_i64_not_32:
-; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: asr x8, x0, #63
-; CHECK-GI-NEXT: eor x8, x8, x0, asr #16
-; CHECK-GI-NEXT: lsl x8, x8, #1
-; CHECK-GI-NEXT: orr x8, x8, #0x1
-; CHECK-GI-NEXT: clz x8, x8
-; CHECK-GI-NEXT: orr x0, x8, #0x10
-; CHECK-GI-NEXT: ret
- %val = ashr i64 %x, 16
- %a = ashr i64 %val, 63
- %b = xor i64 %val, %a
- %c = shl i64 %b, 1
- %d = or i64 %c, 1
- %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true)
- %f = or i64 %e, 16
- ret i64 %f
-}
-
-; There are at least 24 redundant sign bits so we don't need an ori after the clsw.
-define i32 @cls_i32_knownbits_3(i8 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_3:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: cls w0, w0
-; CHECK-NEXT: ret
- %sext = sext i8 %x to i32
- %a = ashr i32 %sext, 31
- %b = xor i32 %sext, %a
- %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
- %d = sub i32 %c, 1
- %e = or i32 %d, 24
- ret i32 %e
-}
-
-; Negative test. We only know there is at least 1 redundant sign bit. We can't
-; remove the ori.
-define i32 @cls_i32_knownbits_4(i32 signext %x) {
-; CHECK-LABEL: cls_i32_knownbits_4:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sbfx w8, w0, #0, #31
-; CHECK-NEXT: cls w8, w8
-; CHECK-NEXT: orr w0, w8, #0x1
-; CHECK-NEXT: ret
- %shl = shl i32 %x, 1
- %ashr = ashr i32 %shl, 1
- %a = ashr i32 %ashr, 31
- %b = xor i32 %ashr, %a
- %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
- %d = sub i32 %c, 1
- %e = or i32 %d, 1
- ret i32 %e
- }
-
-; Negative test. Check that the number of sign bits is not
-; overestimated. If it is, the orr disappears.
-define i32 @cls_i32_knownbits_no_overestimate(i32 signext %x) {
-; CHECK-SD-LABEL: cls_i32_knownbits_no_overestimate:
-; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: asr w8, w0, #15
-; CHECK-SD-NEXT: cls w8, w8
-; CHECK-SD-NEXT: orr w0, w8, #0x10
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cls_i32_knownbits_no_overestimate:
-; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: asr w8, w0, #31
-; CHECK-GI-NEXT: eor w8, w8, w0, asr #15
-; CHECK-GI-NEXT: clz w8, w8
-; CHECK-GI-NEXT: sub w8, w8, #1
-; CHECK-GI-NEXT: orr w0, w8, #0x10
-; CHECK-GI-NEXT: ret
- %ashr = ashr i32 %x, 15
- %a = ashr i32 %ashr, 31
- %b = xor i32 %ashr, %a
- %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false)
- %d = sub i32 %c, 1
- %e = or i32 %d, 16
- ret i32 %e
- }
-
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
>From 0bd44dcf5f96c66a30aa4d5118bd19755800d183 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <arsenm2 at gmail.com>
Date: Mon, 9 Feb 2026 10:04:00 +0100
Subject: [PATCH 9/9] No auto
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index eb0fc236906a0..4468b33520157 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -682,7 +682,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CTLS: {
- auto Reg = MI.getOperand(1).getReg();
+ Register Reg = MI.getOperand(1).getReg();
unsigned MinRedundantSignBits = computeNumSignBits(Reg, Depth + 1) - 1;
unsigned MaxUpperRedundantSignBits = MRI.getType(Reg).getScalarSizeInBits();
More information about the llvm-commits
mailing list