[llvm] [GlobalISel] Add G_ABS computeKnownBits (PR #154413)
Pragyansh Chaturvedi via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 09:54:55 PDT 2025
https://github.com/r41k0u updated https://github.com/llvm/llvm-project/pull/154413
>From 519caea1eae999e9a138f2618e495ba85c4bb32e Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Wed, 20 Aug 2025 00:45:25 +0530
Subject: [PATCH 01/14] [GlobalISel] Add G_ABS computeKnownBits
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 8 +++++++
.../AArch64/GlobalISel/knownbits-abs.mir | 23 +++++++++++++++++++
2 files changed, 31 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 9b4c103763d74..3f6813e52a1cc 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -675,6 +675,14 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
}
break;
}
+ case TargetOpcode::G_ABS: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known = Known.abs();
+ Known.Zero.setHighBits(computeNumSignBits(SrcReg, DemandedElts, Depth + 1) -
+ 1);
+ break;
+ }
}
LLVM_DEBUG(dumpResult(MI, Known, Depth));
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
new file mode 100644
index 0000000000000..c3675dc17e342
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
@@ -0,0 +1,23 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple aarch64 -passes="print<gisel-value-tracking>" %s -filetype=null 2>&1 | FileCheck %s
+
+---
+name: Cst
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @Cst
+ ; CHECK-NEXT: %0:_ KnownBits:00010011 SignBits:3
+ ; CHECK-NEXT: %1:_ KnownBits:00010011 SignBits:3
+ %0:_(s8) = G_CONSTANT i8 19
+ %1:_(s8) = G_ABS %0
+...
+---
+name: CstNeg
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @CstNeg
+ ; CHECK-NEXT: %0:_ KnownBits:11101110 SignBits:3
+ ; CHECK-NEXT: %1:_ KnownBits:00010010 SignBits:3
+ %0:_(s8) = G_CONSTANT i8 238
+ %1:_(s8) = G_ABS %0
+...
>From 237f9966c5482f56e4ba65a41b8ac105a7fa3f7f Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Mon, 1 Sep 2025 02:55:42 +0530
Subject: [PATCH 02/14] [GlobalISel] Add more tests for G_ABS computeKnownBits
---
.../AArch64/GlobalISel/knownbits-abs.mir | 38 ++++++++++++++++++-
1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
index c3675dc17e342..1dd67bf301a32 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple aarch64 -passes="print<gisel-value-tracking>" %s -filetype=null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=aarch64 -passes='print<gisel-value-tracking>' -filetype=null %s 2>&1 | FileCheck %s
---
name: Cst
@@ -21,3 +21,39 @@ body: |
%0:_(s8) = G_CONSTANT i8 238
%1:_(s8) = G_ABS %0
...
+---
+name: SplatVecCst
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @SplatVecCst
+ ; CHECK-NEXT: %0:_ KnownBits:11111010 SignBits:5
+ ; CHECK-NEXT: %1:_ KnownBits:11111010 SignBits:5
+ ; CHECK-NEXT: %2:_ KnownBits:00000110 SignBits:5
+ %0:_(s8) = G_CONSTANT i8 250
+ %1:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR %0(s8)
+ %2:_(<vscale x 16 x s8>) = G_ABS %1
+...
+---
+name: VecCst
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @VecCst
+ ; CHECK-NEXT: %0:_ KnownBits:00011001 SignBits:3
+ ; CHECK-NEXT: %1:_ KnownBits:11100001 SignBits:3
+ ; CHECK-NEXT: %2:_ KnownBits:?????001 SignBits:3
+ ; CHECK-NEXT: %3:_ KnownBits:0??????1 SignBits:1
+ %0:_(s8) = G_CONSTANT i8 25
+ %1:_(s8) = G_CONSTANT i8 225
+ %2:_(<2 x s8>) = G_BUILD_VECTOR %0:_(s8), %1:_(s8)
+ %3:_(<2 x s8>) = G_ABS %2
+...
+---
+name: ImplicitDef
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @ImplicitDef
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1
+ %0:_(s8) = G_IMPLICIT_DEF
+ %1:_(s8) = G_ABS %0
+...
>From 6fe08a854fbfe1fbf270c3b0e24223ebe3d5c70a Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Mon, 1 Sep 2025 15:44:54 +0530
Subject: [PATCH 03/14] [GlobalISel] Add cache assertion and clearing to
GISelValueTracking::computeNumSignBits
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 3f6813e52a1cc..697fa019f5896 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -2058,7 +2058,8 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
- KnownBits Known = getKnownBits(R, DemandedElts, Depth);
+ KnownBits Known;
+ computeKnownBitsImpl(R, Known, DemandedElts, Depth + 1);
APInt Mask;
if (Known.isNonNegative()) { // sign bit is 0
Mask = Known.Zero;
@@ -2079,7 +2080,10 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
LLT Ty = MRI.getType(R);
APInt DemandedElts =
Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
- return computeNumSignBits(R, DemandedElts, Depth);
+ assert(ComputeKnownBitsCache.empty() && "Cache should be empty");
+ unsigned numSignBits = computeNumSignBits(R, DemandedElts, Depth);
+ ComputeKnownBitsCache.clear();
+ return numSignBits;
}
std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
@@ -2185,7 +2189,7 @@ GISelValueTrackingPrinterPass::run(MachineFunction &MF,
Register Reg = MO.getReg();
if (!MRI.getType(Reg).isValid())
continue;
- KnownBits Known = VTA.getKnownBits(Reg);
+ KnownBits Known = VTA.getKnownBits(Reg);
unsigned SignedBits = VTA.computeNumSignBits(Reg);
OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
<< '\n';
>From 7898992cbd51d1654e51afca52b27fe4301e9717 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Mon, 1 Sep 2025 15:49:12 +0530
Subject: [PATCH 04/14] [GlobalISel] Add sign extension tests for
knownbits_abs, update tests
---
.../AArch64/GlobalISel/knownbits-abs.mir | 26 ++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
index 1dd67bf301a32..b4ac62cd992cf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-abs.mir
@@ -41,7 +41,7 @@ body: |
; CHECK-NEXT: %0:_ KnownBits:00011001 SignBits:3
; CHECK-NEXT: %1:_ KnownBits:11100001 SignBits:3
; CHECK-NEXT: %2:_ KnownBits:?????001 SignBits:3
- ; CHECK-NEXT: %3:_ KnownBits:0??????1 SignBits:1
+ ; CHECK-NEXT: %3:_ KnownBits:00?????1 SignBits:2
%0:_(s8) = G_CONSTANT i8 25
%1:_(s8) = G_CONSTANT i8 225
%2:_(<2 x s8>) = G_BUILD_VECTOR %0:_(s8), %1:_(s8)
@@ -57,3 +57,27 @@ body: |
%0:_(s8) = G_IMPLICIT_DEF
%1:_(s8) = G_ABS %0
...
+---
+name: CstSext
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @CstSext
+ ; CHECK-NEXT: %0:_ KnownBits:11000111 SignBits:2
+ ; CHECK-NEXT: %1:_ KnownBits:1111111111000111 SignBits:10
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000111001 SignBits:10
+ %0:_(s8) = G_CONSTANT i8 199
+ %1:_(s16) = G_SEXT %0
+ %2:_(s16) = G_ABS %1
+...
+---
+name: ImplicitDefSext
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: @ImplicitDefSext
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:9
+ ; CHECK-NEXT: %2:_ KnownBits:00000000???????? SignBits:8
+ %0:_(s8) = G_IMPLICIT_DEF
+ %1:_(s16) = G_SEXT %0
+ %2:_(s16) = G_ABS %1
+...
>From 90c15e872058a7eb3aa8324ff1cf618e68522692 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Mon, 1 Sep 2025 15:52:47 +0530
Subject: [PATCH 05/14] [GlobalISel] Fix formatting in
GISelValueTrackingPrinterPass::run
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 697fa019f5896..04a9d6ffa2494 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -2189,7 +2189,7 @@ GISelValueTrackingPrinterPass::run(MachineFunction &MF,
Register Reg = MO.getReg();
if (!MRI.getType(Reg).isValid())
continue;
- KnownBits Known = VTA.getKnownBits(Reg);
+ KnownBits Known = VTA.getKnownBits(Reg);
unsigned SignedBits = VTA.computeNumSignBits(Reg);
OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
<< '\n';
>From 4973773a1f5d1c9fb2dd06d58afbc21bcdaa1e9e Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Wed, 3 Sep 2025 12:18:05 +0530
Subject: [PATCH 06/14] [GlobalIsel] Add computeNumSignBitsImpl to
GISelValueTracking
---
.../llvm/CodeGen/GlobalISel/GISelValueTracking.h | 4 ++++
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 16 +++++++++++-----
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index 2db66ba9584a3..c5a3f39baec0f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -67,6 +67,10 @@ class LLVM_ABI GISelValueTracking : public GISelChangeObserver {
void computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts, unsigned Depth = 0);
+ virtual unsigned computeNumSignBitsImpl(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth = 0);
+
unsigned computeNumSignBits(Register R, const APInt &DemandedElts,
unsigned Depth = 0);
unsigned computeNumSignBits(Register R, unsigned Depth = 0);
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 04a9d6ffa2494..11e71602dd328 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -1764,7 +1764,7 @@ static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
CR.getSignedMax().getNumSignBits());
}
-unsigned GISelValueTracking::computeNumSignBits(Register R,
+unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
const APInt &DemandedElts,
unsigned Depth) {
MachineInstr &MI = *MRI.getVRegDef(R);
@@ -2076,14 +2076,20 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
return std::max(FirstAnswer, Mask.countl_one());
}
+unsigned GISelValueTracking::computeNumSignBits(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ assert(ComputeKnownBitsCache.empty() && "Cache should be empty");
+ unsigned NumSignBits = computeNumSignBitsImpl(R, DemandedElts, Depth);
+ ComputeKnownBitsCache.clear();
+ return NumSignBits;
+}
+
unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
LLT Ty = MRI.getType(R);
APInt DemandedElts =
Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
- assert(ComputeKnownBitsCache.empty() && "Cache should be empty");
- unsigned numSignBits = computeNumSignBits(R, DemandedElts, Depth);
- ComputeKnownBitsCache.clear();
- return numSignBits;
+ return computeNumSignBits(R, DemandedElts, Depth);
}
std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
>From 1a5237cb79025d70f42681f9ada66db2d50a6853 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Wed, 3 Sep 2025 13:28:09 +0530
Subject: [PATCH 07/14] [GlobalISel] Update aarch64-smull.ll test
---
llvm/test/CodeGen/AArch64/aarch64-smull.ll | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 6e5c666bdbc75..52cb13b1d9f30 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -249,10 +249,20 @@ define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
; CHECK-GI-NEXT: movi d0, #0x00ffff0000ffff
; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
+; CHECK-GI-NEXT: mov w8, v0.s[0]
+; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: ldr d0, [x1]
-; CHECK-GI-NEXT: smull v0.2d, v1.2s, v0.2s
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: fmov d1, x8
+; CHECK-GI-NEXT: fmov x11, d0
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov x9, v0.d[1]
+; CHECK-GI-NEXT: fmov x10, d1
+; CHECK-GI-NEXT: mov x8, v1.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
%load.A = load <2 x i16>, ptr %A
%load.B = load <2 x i32>, ptr %B
>From b1fa17db425364920e503940f7a7c07bf67b4264 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Thu, 4 Sep 2025 14:41:10 +0530
Subject: [PATCH 08/14] [GlobalISel] Replace relevant occurences of
computeNumSignBits to computeNumSignBitsImpl
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 30 +++++++++----------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 ++--
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 11e71602dd328..9cef9e28dd46e 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -679,7 +679,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Register SrcReg = MI.getOperand(1).getReg();
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
Known = Known.abs();
- Known.Zero.setHighBits(computeNumSignBits(SrcReg, DemandedElts, Depth + 1) -
+ Known.Zero.setHighBits(computeNumSignBitsImpl(SrcReg, DemandedElts, Depth + 1) -
1);
break;
}
@@ -1731,10 +1731,10 @@ unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
const APInt &DemandedElts,
unsigned Depth) {
// Test src1 first, since we canonicalize simpler expressions to the RHS.
- unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
+ unsigned Src1SignBits = computeNumSignBitsImpl(Src1, DemandedElts, Depth);
if (Src1SignBits == 1)
return 1;
- return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
+ return std::min(computeNumSignBitsImpl(Src0, DemandedElts, Depth), Src1SignBits);
}
/// Compute the known number of sign bits with attached range metadata in the
@@ -1796,7 +1796,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
MRI.getType(Src.getReg()).isValid()) {
// Don't increment Depth for this one since we didn't do any work.
- return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
+ return computeNumSignBitsImpl(Src.getReg(), DemandedElts, Depth);
}
return 1;
@@ -1805,7 +1805,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
Register Src = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src);
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
- return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
+ return computeNumSignBitsImpl(Src, DemandedElts, Depth + 1) + Tmp;
}
case TargetOpcode::G_ASSERT_SEXT:
case TargetOpcode::G_SEXT_INREG: {
@@ -1813,7 +1813,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
Register Src = MI.getOperand(1).getReg();
unsigned SrcBits = MI.getOperand(2).getImm();
unsigned InRegBits = TyBits - SrcBits + 1;
- return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1),
+ return std::max(computeNumSignBitsImpl(Src, DemandedElts, Depth + 1),
InRegBits);
}
case TargetOpcode::G_LOAD: {
@@ -1858,11 +1858,11 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_XOR: {
Register Src1 = MI.getOperand(1).getReg();
unsigned Src1NumSignBits =
- computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
if (Src1NumSignBits != 1) {
Register Src2 = MI.getOperand(2).getReg();
unsigned Src2NumSignBits =
- computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ computeNumSignBitsImpl(Src2, DemandedElts, Depth + 1);
FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
}
break;
@@ -1870,7 +1870,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_ASHR: {
Register Src1 = MI.getOperand(1).getReg();
Register Src2 = MI.getOperand(2).getReg();
- FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ FirstAnswer = computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
if (auto C = getValidMinimumShiftAmount(Src2, DemandedElts, Depth + 1))
FirstAnswer = std::min<uint64_t>(FirstAnswer + *C, TyBits);
break;
@@ -1920,7 +1920,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
// Check if the sign bits of source go down as far as the truncated value.
unsigned DstTyBits = DstTy.getScalarSizeInBits();
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
- unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
+ unsigned NumSrcSignBits = computeNumSignBitsImpl(Src, DemandedElts, Depth + 1);
if (NumSrcSignBits > (NumSrcBits - DstTyBits))
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
@@ -1980,7 +1980,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
continue;
unsigned Tmp2 =
- computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1);
+ computeNumSignBitsImpl(MO.getReg(), SingleDemandedElt, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
// If we don't know any bits, early out.
@@ -2002,7 +2002,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
if (!DemandedSub)
continue;
- unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1);
+ unsigned Tmp2 = computeNumSignBitsImpl(MO.getReg(), DemandedSub, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
@@ -2023,13 +2023,13 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
return 1;
if (!!DemandedLHS)
- FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
+ FirstAnswer = computeNumSignBitsImpl(Src1, DemandedLHS, Depth + 1);
// If we don't know anything, early out and try computeKnownBits fall-back.
if (FirstAnswer == 1)
break;
if (!!DemandedRHS) {
unsigned Tmp2 =
- computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
+ computeNumSignBitsImpl(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
}
break;
@@ -2037,7 +2037,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_SPLAT_VECTOR: {
// Check if the sign bits of source go down as far as the truncated value.
Register Src = MI.getOperand(1).getReg();
- unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1);
+ unsigned NumSrcSignBits = computeNumSignBitsImpl(Src, APInt(1, 1), Depth + 1);
unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
if (NumSrcSignBits > (NumSrcBits - TyBits))
return NumSrcSignBits - (NumSrcBits - TyBits);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f069b591eb315..a7eafb94596ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6139,13 +6139,13 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
case AMDGPU::G_AMDGPU_SMED3:
case AMDGPU::G_AMDGPU_UMED3: {
auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
- unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ unsigned Tmp2 = Analysis.computeNumSignBitsImpl(Src2, DemandedElts, Depth + 1);
if (Tmp2 == 1)
return 1;
- unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ unsigned Tmp1 = Analysis.computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
if (Tmp1 == 1)
return 1;
- unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
+ unsigned Tmp0 = Analysis.computeNumSignBitsImpl(Src0, DemandedElts, Depth + 1);
if (Tmp0 == 1)
return 1;
return std::min({Tmp0, Tmp1, Tmp2});
>From 6729b0fc99a39c57c67d5b826abc7a64ab51165a Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Fri, 5 Sep 2025 04:13:19 +0530
Subject: [PATCH 09/14] [GlobalISel] Fix depth for calling computeKnownBitsImpl
in computeNumSignBitsImpl
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 2 +-
llvm/test/CodeGen/AArch64/aarch64-smull.ll | 16 +++-------------
2 files changed, 4 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 9cef9e28dd46e..0cba2d7f89fca 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -2059,7 +2059,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
KnownBits Known;
- computeKnownBitsImpl(R, Known, DemandedElts, Depth + 1);
+ computeKnownBitsImpl(R, Known, DemandedElts, Depth);
APInt Mask;
if (Known.isNonNegative()) { // sign bit is 0
Mask = Known.Zero;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 52cb13b1d9f30..6e5c666bdbc75 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -249,20 +249,10 @@ define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
; CHECK-GI-NEXT: movi d0, #0x00ffff0000ffff
; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: mov w8, v0.s[0]
-; CHECK-GI-NEXT: mov w9, v0.s[1]
+; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-GI-NEXT: ldr d0, [x1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fmov x11, d0
-; CHECK-GI-NEXT: mov v1.d[1], x9
-; CHECK-GI-NEXT: mov x9, v0.d[1]
-; CHECK-GI-NEXT: fmov x10, d1
-; CHECK-GI-NEXT: mov x8, v1.d[1]
-; CHECK-GI-NEXT: mul x10, x10, x11
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: fmov d0, x10
-; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: smull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%load.A = load <2 x i16>, ptr %A
%load.B = load <2 x i32>, ptr %B
>From bcd839333858f911b789aaae576ab0eb55a5bc47 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Fri, 5 Sep 2025 12:38:07 +0530
Subject: [PATCH 10/14] [GlobalISel] clang-format
---
.../CodeGen/GlobalISel/GISelValueTracking.h | 3 +--
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 24 +++++++++++--------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 9 ++++---
3 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index c5a3f39baec0f..a9f107bf8c5e2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -67,8 +67,7 @@ class LLVM_ABI GISelValueTracking : public GISelChangeObserver {
void computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts, unsigned Depth = 0);
- virtual unsigned computeNumSignBitsImpl(Register R,
- const APInt &DemandedElts,
+ virtual unsigned computeNumSignBitsImpl(Register R, const APInt &DemandedElts,
unsigned Depth = 0);
unsigned computeNumSignBits(Register R, const APInt &DemandedElts,
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 0cba2d7f89fca..2712fd99173e7 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -679,8 +679,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Register SrcReg = MI.getOperand(1).getReg();
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
Known = Known.abs();
- Known.Zero.setHighBits(computeNumSignBitsImpl(SrcReg, DemandedElts, Depth + 1) -
- 1);
+ Known.Zero.setHighBits(
+ computeNumSignBitsImpl(SrcReg, DemandedElts, Depth + 1) - 1);
break;
}
}
@@ -1734,7 +1734,8 @@ unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
unsigned Src1SignBits = computeNumSignBitsImpl(Src1, DemandedElts, Depth);
if (Src1SignBits == 1)
return 1;
- return std::min(computeNumSignBitsImpl(Src0, DemandedElts, Depth), Src1SignBits);
+ return std::min(computeNumSignBitsImpl(Src0, DemandedElts, Depth),
+ Src1SignBits);
}
/// Compute the known number of sign bits with attached range metadata in the
@@ -1765,8 +1766,8 @@ static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
}
unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
- const APInt &DemandedElts,
- unsigned Depth) {
+ const APInt &DemandedElts,
+ unsigned Depth) {
MachineInstr &MI = *MRI.getVRegDef(R);
unsigned Opcode = MI.getOpcode();
@@ -1920,7 +1921,8 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
// Check if the sign bits of source go down as far as the truncated value.
unsigned DstTyBits = DstTy.getScalarSizeInBits();
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
- unsigned NumSrcSignBits = computeNumSignBitsImpl(Src, DemandedElts, Depth + 1);
+ unsigned NumSrcSignBits =
+ computeNumSignBitsImpl(Src, DemandedElts, Depth + 1);
if (NumSrcSignBits > (NumSrcBits - DstTyBits))
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
@@ -2002,7 +2004,8 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
if (!DemandedSub)
continue;
- unsigned Tmp2 = computeNumSignBitsImpl(MO.getReg(), DemandedSub, Depth + 1);
+ unsigned Tmp2 =
+ computeNumSignBitsImpl(MO.getReg(), DemandedSub, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
@@ -2028,8 +2031,8 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
if (FirstAnswer == 1)
break;
if (!!DemandedRHS) {
- unsigned Tmp2 =
- computeNumSignBitsImpl(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
+ unsigned Tmp2 = computeNumSignBitsImpl(MI.getOperand(2).getReg(),
+ DemandedRHS, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
}
break;
@@ -2037,7 +2040,8 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_SPLAT_VECTOR: {
// Check if the sign bits of source go down as far as the truncated value.
Register Src = MI.getOperand(1).getReg();
- unsigned NumSrcSignBits = computeNumSignBitsImpl(Src, APInt(1, 1), Depth + 1);
+ unsigned NumSrcSignBits =
+ computeNumSignBitsImpl(Src, APInt(1, 1), Depth + 1);
unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
if (NumSrcSignBits > (NumSrcBits - TyBits))
return NumSrcSignBits - (NumSrcBits - TyBits);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a7eafb94596ef..fd61b6e0ba92b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6139,13 +6139,16 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
case AMDGPU::G_AMDGPU_SMED3:
case AMDGPU::G_AMDGPU_UMED3: {
auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
- unsigned Tmp2 = Analysis.computeNumSignBitsImpl(Src2, DemandedElts, Depth + 1);
+ unsigned Tmp2 =
+ Analysis.computeNumSignBitsImpl(Src2, DemandedElts, Depth + 1);
if (Tmp2 == 1)
return 1;
- unsigned Tmp1 = Analysis.computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
+ unsigned Tmp1 =
+ Analysis.computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
if (Tmp1 == 1)
return 1;
- unsigned Tmp0 = Analysis.computeNumSignBitsImpl(Src0, DemandedElts, Depth + 1);
+ unsigned Tmp0 =
+ Analysis.computeNumSignBitsImpl(Src0, DemandedElts, Depth + 1);
if (Tmp0 == 1)
return 1;
return std::min({Tmp0, Tmp1, Tmp2});
>From 7e84ad1828ab13185450b1fb6d476e0adf1601f9 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyansh.chaturvedi at canonical.com>
Date: Sun, 7 Sep 2025 23:37:32 +0530
Subject: [PATCH 11/14] [GlobalISel] Replace getKnownBits with its Impl in
getValidShiftAmountRange
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 2712fd99173e7..39dcfebb624b1 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -2139,7 +2139,8 @@ std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
// Use computeKnownBits to find a hidden constant/knownbits (usually type
// legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
- KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
+ KnownBits KnownAmt;
+ computeKnownBitsImpl(R, KnownAmt, DemandedElts, Depth);
if (KnownAmt.getMaxValue().ult(BitWidth))
return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false);
>From a1859460d161f13d54b6801bd2716096f9843081 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyanshchaturvedi18 at gmail.com>
Date: Fri, 19 Sep 2025 10:05:45 +0000
Subject: [PATCH 12/14] [GlobalISel] Remove workarounds for cache assertion
while adding G_ABS knownbits
---
.../CodeGen/GlobalISel/GISelValueTracking.h | 3 -
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 57 +++++++------------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 9 +--
3 files changed, 24 insertions(+), 45 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
index a9f107bf8c5e2..2db66ba9584a3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelValueTracking.h
@@ -67,9 +67,6 @@ class LLVM_ABI GISelValueTracking : public GISelChangeObserver {
void computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts, unsigned Depth = 0);
- virtual unsigned computeNumSignBitsImpl(Register R, const APInt &DemandedElts,
- unsigned Depth = 0);
-
unsigned computeNumSignBits(Register R, const APInt &DemandedElts,
unsigned Depth = 0);
unsigned computeNumSignBits(Register R, unsigned Depth = 0);
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 39dcfebb624b1..993c30d270804 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -680,7 +680,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
Known = Known.abs();
Known.Zero.setHighBits(
- computeNumSignBitsImpl(SrcReg, DemandedElts, Depth + 1) - 1);
+ computeNumSignBits(SrcReg, DemandedElts, Depth + 1) - 1);
break;
}
}
@@ -1731,11 +1731,10 @@ unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
const APInt &DemandedElts,
unsigned Depth) {
// Test src1 first, since we canonicalize simpler expressions to the RHS.
- unsigned Src1SignBits = computeNumSignBitsImpl(Src1, DemandedElts, Depth);
+ unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
if (Src1SignBits == 1)
return 1;
- return std::min(computeNumSignBitsImpl(Src0, DemandedElts, Depth),
- Src1SignBits);
+ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
}
/// Compute the known number of sign bits with attached range metadata in the
@@ -1765,9 +1764,9 @@ static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
CR.getSignedMax().getNumSignBits());
}
-unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
- const APInt &DemandedElts,
- unsigned Depth) {
+unsigned GISelValueTracking::computeNumSignBits(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth) {
MachineInstr &MI = *MRI.getVRegDef(R);
unsigned Opcode = MI.getOpcode();
@@ -1797,7 +1796,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
MRI.getType(Src.getReg()).isValid()) {
// Don't increment Depth for this one since we didn't do any work.
- return computeNumSignBitsImpl(Src.getReg(), DemandedElts, Depth);
+ return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
}
return 1;
@@ -1806,7 +1805,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
Register Src = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src);
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
- return computeNumSignBitsImpl(Src, DemandedElts, Depth + 1) + Tmp;
+ return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
}
case TargetOpcode::G_ASSERT_SEXT:
case TargetOpcode::G_SEXT_INREG: {
@@ -1814,7 +1813,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
Register Src = MI.getOperand(1).getReg();
unsigned SrcBits = MI.getOperand(2).getImm();
unsigned InRegBits = TyBits - SrcBits + 1;
- return std::max(computeNumSignBitsImpl(Src, DemandedElts, Depth + 1),
+ return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1),
InRegBits);
}
case TargetOpcode::G_LOAD: {
@@ -1859,11 +1858,11 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_XOR: {
Register Src1 = MI.getOperand(1).getReg();
unsigned Src1NumSignBits =
- computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
if (Src1NumSignBits != 1) {
Register Src2 = MI.getOperand(2).getReg();
unsigned Src2NumSignBits =
- computeNumSignBitsImpl(Src2, DemandedElts, Depth + 1);
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
}
break;
@@ -1871,7 +1870,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_ASHR: {
Register Src1 = MI.getOperand(1).getReg();
Register Src2 = MI.getOperand(2).getReg();
- FirstAnswer = computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
+ FirstAnswer = computeNumSignBits(Src1, DemandedElts, Depth + 1);
if (auto C = getValidMinimumShiftAmount(Src2, DemandedElts, Depth + 1))
FirstAnswer = std::min<uint64_t>(FirstAnswer + *C, TyBits);
break;
@@ -1921,8 +1920,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
// Check if the sign bits of source go down as far as the truncated value.
unsigned DstTyBits = DstTy.getScalarSizeInBits();
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
- unsigned NumSrcSignBits =
- computeNumSignBitsImpl(Src, DemandedElts, Depth + 1);
+ unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
if (NumSrcSignBits > (NumSrcBits - DstTyBits))
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
@@ -1982,7 +1980,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
continue;
unsigned Tmp2 =
- computeNumSignBitsImpl(MO.getReg(), SingleDemandedElt, Depth + 1);
+ computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
// If we don't know any bits, early out.
@@ -2004,8 +2002,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
if (!DemandedSub)
continue;
- unsigned Tmp2 =
- computeNumSignBitsImpl(MO.getReg(), DemandedSub, Depth + 1);
+ unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
@@ -2026,13 +2023,13 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
return 1;
if (!!DemandedLHS)
- FirstAnswer = computeNumSignBitsImpl(Src1, DemandedLHS, Depth + 1);
+ FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
// If we don't know anything, early out and try computeKnownBits fall-back.
if (FirstAnswer == 1)
break;
if (!!DemandedRHS) {
- unsigned Tmp2 = computeNumSignBitsImpl(MI.getOperand(2).getReg(),
- DemandedRHS, Depth + 1);
+ unsigned Tmp2 =
+ computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
}
break;
@@ -2040,8 +2037,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
case TargetOpcode::G_SPLAT_VECTOR: {
// Check if the sign bits of source go down as far as the truncated value.
Register Src = MI.getOperand(1).getReg();
- unsigned NumSrcSignBits =
- computeNumSignBitsImpl(Src, APInt(1, 1), Depth + 1);
+ unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1);
unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
if (NumSrcSignBits > (NumSrcBits - TyBits))
return NumSrcSignBits - (NumSrcBits - TyBits);
@@ -2062,8 +2058,7 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
- KnownBits Known;
- computeKnownBitsImpl(R, Known, DemandedElts, Depth);
+ KnownBits Known = getKnownBits(R, DemandedElts, Depth);
APInt Mask;
if (Known.isNonNegative()) { // sign bit is 0
Mask = Known.Zero;
@@ -2080,15 +2075,6 @@ unsigned GISelValueTracking::computeNumSignBitsImpl(Register R,
return std::max(FirstAnswer, Mask.countl_one());
}
-unsigned GISelValueTracking::computeNumSignBits(Register R,
- const APInt &DemandedElts,
- unsigned Depth) {
- assert(ComputeKnownBitsCache.empty() && "Cache should be empty");
- unsigned NumSignBits = computeNumSignBitsImpl(R, DemandedElts, Depth);
- ComputeKnownBitsCache.clear();
- return NumSignBits;
-}
-
unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
LLT Ty = MRI.getType(R);
APInt DemandedElts =
@@ -2139,8 +2125,7 @@ std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
// Use computeKnownBits to find a hidden constant/knownbits (usually type
// legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
- KnownBits KnownAmt;
- computeKnownBitsImpl(R, KnownAmt, DemandedElts, Depth);
+ KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
if (KnownAmt.getMaxValue().ult(BitWidth))
return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fd61b6e0ba92b..f069b591eb315 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6139,16 +6139,13 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
case AMDGPU::G_AMDGPU_SMED3:
case AMDGPU::G_AMDGPU_UMED3: {
auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
- unsigned Tmp2 =
- Analysis.computeNumSignBitsImpl(Src2, DemandedElts, Depth + 1);
+ unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
if (Tmp2 == 1)
return 1;
- unsigned Tmp1 =
- Analysis.computeNumSignBitsImpl(Src1, DemandedElts, Depth + 1);
+ unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
if (Tmp1 == 1)
return 1;
- unsigned Tmp0 =
- Analysis.computeNumSignBitsImpl(Src0, DemandedElts, Depth + 1);
+ unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
if (Tmp0 == 1)
return 1;
return std::min({Tmp0, Tmp1, Tmp2});
>From 66cf46f2fa2912ea41a67859d5ccbd9376b6ace5 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyanshchaturvedi18 at gmail.com>
Date: Fri, 19 Sep 2025 10:37:37 +0000
Subject: [PATCH 13/14] [GlobalISel] clang-format
---
llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 993c30d270804..3f6813e52a1cc 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -679,8 +679,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Register SrcReg = MI.getOperand(1).getReg();
computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
Known = Known.abs();
- Known.Zero.setHighBits(
- computeNumSignBits(SrcReg, DemandedElts, Depth + 1) - 1);
+ Known.Zero.setHighBits(computeNumSignBits(SrcReg, DemandedElts, Depth + 1) -
+ 1);
break;
}
}
@@ -1765,7 +1765,7 @@ static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
}
unsigned GISelValueTracking::computeNumSignBits(Register R,
- const APInt &DemandedElts,
+ const APInt &DemandedElts,
unsigned Depth) {
MachineInstr &MI = *MRI.getVRegDef(R);
unsigned Opcode = MI.getOpcode();
@@ -2028,8 +2028,8 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
if (FirstAnswer == 1)
break;
if (!!DemandedRHS) {
- unsigned Tmp2 =
- computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
+ unsigned Tmp2 =
+ computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
FirstAnswer = std::min(FirstAnswer, Tmp2);
}
break;
>From 7e0073dd33e6377f121751b49679be1b83528903 Mon Sep 17 00:00:00 2001
From: Pragyansh Chaturvedi <pragyanshchaturvedi18 at gmail.com>
Date: Sat, 20 Sep 2025 16:29:24 +0000
Subject: [PATCH 14/14] [GlobalISel] Update AMDGPU tests for G_ABS KnownBits
tracking
---
.../AMDGPU/GlobalISel/legalize-abs.mir | 20 ++++++-------------
.../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll | 7 +------
2 files changed, 7 insertions(+), 20 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir
index 73977eb640a48..8b19d7d11a86b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir
@@ -302,11 +302,8 @@ body: |
; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]]
; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG1]]
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ABS]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ABS1]], [[C1]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ABS1]], [[C]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ABS]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
;
@@ -429,16 +426,11 @@ body: |
; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG2]]
; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
; SI-NEXT: [[ABS3:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG3]]
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ABS]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ABS1]], [[C1]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ABS1]], [[C]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ABS]], [[SHL]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ABS2]], [[C1]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ABS3]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ABS3]], [[C]](s32)
+ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ABS2]], [[SHL1]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 800df89877036..02d0e521e3b00 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -459,8 +459,6 @@ define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_abs_i32 s1, s1
; GFX8-NEXT: s_abs_i32 s0, s0
-; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
-; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
@@ -548,12 +546,9 @@ define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
; GFX8-NEXT: s_abs_i32 s2, s2
; GFX8-NEXT: s_abs_i32 s0, s0
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX8-NEXT: s_abs_i32 s1, s1
-; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
+; GFX8-NEXT: s_abs_i32 s1, s1
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: abs_sgpr_v3i16:
More information about the llvm-commits
mailing list