[llvm] [AMDGPU] Compute GISel KnownBits for S_BFE instructions (PR #141588)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 06:45:15 PDT 2025
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141588
>From 0793c9530921fd8050950f569947f25b405d1774 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 27 May 2025 10:26:24 +0200
Subject: [PATCH 1/2] [AMDGPU] Compute GISel KnownBits for S_BFE instructions
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 45 +++++++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ade88a16193b8..1a1490b55d3e4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16502,12 +16502,57 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
}
+static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
+ KnownBits &Known, const APInt &DemandedElts,
+ unsigned BFEWidth, bool SExt) {
+ const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
+ const MachineOperand &Src1 = MI.getOperand(2);
+
+ unsigned Src1Cst = 0;
+ if (Src1.isImm())
+ Src1Cst = Src1.getImm();
+ else if (Src1.isReg()) {
+ auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
+ if (!Cst)
+ return;
+ Src1Cst = Cst->Value.getZExtValue();
+ } else
+ return;
+
+ const unsigned Mask = maskTrailingOnes<unsigned>(6);
+ const unsigned Offset = Src1Cst & Mask;
+ const unsigned Width = (Src1Cst >> 16) & Mask;
+
+ VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
+
+ const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
+ Known.Zero = Known.Zero.shl(Offset) & WidthMask;
+ Known.One = Known.One.shl(Offset) & WidthMask;
+
+ if (SExt)
+ Known.sextInReg(Width);
+ else
+ Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
+}
+
void SITargetLowering::computeKnownBitsForTargetInstr(
GISelValueTracking &VT, Register R, KnownBits &Known,
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
+ case AMDGPU::S_BFE_I32:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+ /*SExt=*/true);
+ case AMDGPU::S_BFE_U32:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+ /*SExt=*/false);
+ case AMDGPU::S_BFE_I64:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+ /*SExt=*/true);
+ case AMDGPU::S_BFE_U64:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+ /*SExt=*/false);
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
>From a1992f6d933ea2d48cd76b27952c03bd5a6c9b76 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 27 May 2025 15:44:52 +0200
Subject: [PATCH 2/2] Fixes + add tests
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 +-
.../AMDGPU/GlobalISel/known-bits-sbfe.mir | 139 ++++++++++++++++++
.../update_givaluetracking_test_checks.py | 2 +-
3 files changed, 147 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1a1490b55d3e4..f10fc1f982a74 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16525,20 +16525,22 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
- const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
- Known.Zero = Known.Zero.shl(Offset) & WidthMask;
- Known.One = Known.One.shl(Offset) & WidthMask;
+ Known.Zero = Known.Zero.lshr(Offset);
+ Known.One = Known.One.lshr(Offset);
+
+ Known = Known.trunc(Width);
if (SExt)
- Known.sextInReg(Width);
+ Known = Known.sext(BFEWidth);
else
- Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
+ Known = Known.zext(BFEWidth);
}
void SITargetLowering::computeKnownBitsForTargetInstr(
GISelValueTracking &VT, Register R, KnownBits &Known,
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
unsigned Depth) const {
+ Known.resetAll();
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
case AMDGPU::S_BFE_I32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
new file mode 100644
index 0000000000000..85f1f66c624d9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
@@ -0,0 +1,139 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes="print<gisel-value-tracking>" %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: test_s_bfe_u32_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_u32_constants
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+ %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+ %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_i32_constants
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+ %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+ %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_u64_constants
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+ %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+ %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_i64_constants
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+ %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+ %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+ ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+ ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+ ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24
+ %input:sgpr_32(s32) = COPY $sgpr0
+ %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+ %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+ %masked_input:sgpr_32(s32) = G_AND %input, %mask
+ %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+ %bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+ ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+ ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+ ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26
+ %input:sgpr_32(s32) = COPY $sgpr0
+ %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+ %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+ %masked_input:sgpr_32(s32) = G_AND %input, %mask
+ %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+ %bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+ ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+ ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+ ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56
+ %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+ %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+ %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+ %masked_input:sgpr_64(s64) = G_AND %input, %mask
+ %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+ %bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+ ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+ ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+ ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58
+ %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+ %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+ %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+ %masked_input:sgpr_64(s64) = G_AND %input, %mask
+ %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+ %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
diff --git a/llvm/utils/update_givaluetracking_test_checks.py b/llvm/utils/update_givaluetracking_test_checks.py
index 3a8f5395e8eb3..49b068ac7bef0 100755
--- a/llvm/utils/update_givaluetracking_test_checks.py
+++ b/llvm/utils/update_givaluetracking_test_checks.py
@@ -22,7 +22,7 @@
VT_FUNCTION_RE = re.compile(
r"\s*name:\s*@(?P<func>[A-Za-z0-9_-]+)"
- r"(?P<body>(\s*%[0-9a-zA-Z_]+:_\s*KnownBits:[01?]+\sSignBits:[0-9]+$)+)",
+ r"(?P<body>(\s*%[0-9a-zA-Z_]+:[A-Za-z0-9_-]+\s*KnownBits:[01?]+\sSignBits:[0-9]+$)+)",
flags=(re.X | re.M),
)
More information about the llvm-commits
mailing list