[llvm] 7d52b72 - [AMDGPU] Compute GISel KnownBits for S_BFE instructions (#141588)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 00:56:49 PDT 2025
Author: Pierre van Houtryve
Date: 2025-07-16T09:56:45+02:00
New Revision: 7d52b72239caba92a48e2cacbe5758e18efaed1c
URL: https://github.com/llvm/llvm-project/commit/7d52b72239caba92a48e2cacbe5758e18efaed1c
DIFF: https://github.com/llvm/llvm-project/commit/7d52b72239caba92a48e2cacbe5758e18efaed1c.diff
LOG: [AMDGPU] Compute GISel KnownBits for S_BFE instructions (#141588)
Next patches in the stack will emit them in the RegBankCombiner. With this, S_BFE instructions will hopefully interfere less with optimizations.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e449e1ab6cb20..dfe6f65d240e6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16910,12 +16910,63 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
}
+static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
+ KnownBits &Known, const APInt &DemandedElts,
+ unsigned BFEWidth, bool SExt, unsigned Depth) {
+ const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
+ const MachineOperand &Src1 = MI.getOperand(2);
+
+ unsigned Src1Cst = 0;
+ if (Src1.isImm()) {
+ Src1Cst = Src1.getImm();
+ } else if (Src1.isReg()) {
+ auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
+ if (!Cst)
+ return;
+ Src1Cst = Cst->Value.getZExtValue();
+ } else {
+ return;
+ }
+
+ // Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit.
+ // Width is always [22:16].
+ const unsigned Offset =
+ Src1Cst & maskTrailingOnes<unsigned>((BFEWidth == 32) ? 5 : 6);
+ const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes<unsigned>(6);
+
+ if (Width >= BFEWidth) // Ill-formed.
+ return;
+
+ VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+
+ Known = Known.extractBits(Width, Offset);
+
+ if (SExt)
+ Known = Known.sext(BFEWidth);
+ else
+ Known = Known.zext(BFEWidth);
+}
+
void SITargetLowering::computeKnownBitsForTargetInstr(
GISelValueTracking &VT, Register R, KnownBits &Known,
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
unsigned Depth) const {
+ Known.resetAll();
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
+ case AMDGPU::S_BFE_I32:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+ /*SExt=*/true, Depth);
+ case AMDGPU::S_BFE_U32:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+ /*SExt=*/false, Depth);
+ case AMDGPU::S_BFE_I64:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+ /*SExt=*/true, Depth);
+ case AMDGPU::S_BFE_U64:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+ /*SExt=*/false, Depth);
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
new file mode 100644
index 0000000000000..7a6e07ddf2290
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
@@ -0,0 +1,253 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print<gisel-value-tracking>' %s -filetype=null 2>&1 | FileCheck %s
+
+---
+name: test_s_bfe_u32_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_u32_constants
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+ %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+ %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_i32_constants
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+ %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+ %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_u64_constants
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+ %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+ %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_i64_constants
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+ %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+ %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+ ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+ ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+ ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24
+ %input:sgpr_32(s32) = COPY $sgpr0
+ %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+ %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+ %masked_input:sgpr_32(s32) = G_AND %input, %mask
+ %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+ %bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+ ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+ ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+ ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26
+ %input:sgpr_32(s32) = COPY $sgpr0
+ %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+ %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+ %masked_input:sgpr_32(s32) = G_AND %input, %mask
+ %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+ %bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+ ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+ ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+ ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56
+ %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+ %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+ %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+ %masked_input:sgpr_64(s64) = G_AND %input, %mask
+ %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+ %bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_middle_bits_unknown
+body: |
+ bb.0:
+ ; Extract [8:16) but the middle 4 bits are ????
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown
+ ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+ ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+ ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+ ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58
+ %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+ %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+ %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+ %masked_input:sgpr_64(s64) = G_AND %input, %mask
+ %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+ %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_g_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants
+ ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+ %src0:sgpr_32(s32) = G_CONSTANT i32 65535
+ %src1:sgpr_32(s32) = G_CONSTANT i32 262156
+ %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_g_constants
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants
+ ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+ %src0:sgpr_64(s64) = G_CONSTANT i64 65535
+ %src1:sgpr_32(s32) = G_CONSTANT i32 262156
+ %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_g_constants_lookthrough
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough
+ ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
+ ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+ %src0:sgpr_32(s32) = G_CONSTANT i32 65535
+ %src1:sgpr_32(s24) = G_CONSTANT i24 262156
+ %src1_ext:sgpr_32(s32) = G_ZEXT %src1
+ %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_g_constants_lookthrough
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough
+ ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
+ ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+ %src0:sgpr_64(s64) = G_CONSTANT i64 65535
+ %src1:sgpr_32(s24) = G_CONSTANT i24 262156
+ %src1_ext:sgpr_32(s32) = G_ZEXT %src1
+ %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_trash_bits
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; Check that the 6th bit is ignored for u32. The lower 6 bits are
+ ; 101100 but we should mask out the first 1 for the 32 bit version.
+ ; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+ %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+ %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_trash_bits
+body: |
+ bb.0:
+ ; Extract [12:16)
+ ; Check that the 6th bit is ignored for i32. The lower 6 bits are
+ ; 101100 but we should mask out the first 1 for the 32 bit version.
+ ; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits
+ ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+ %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+ %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc
+ $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants_sixth_bit
+body: |
+ bb.0:
+ ; Extract [32:48)
+ ; Check we correctly read 6 bits for the width on 64 bit BFEs.
+ ; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+ %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
+ %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants_sixth_bit
+body: |
+ bb.0:
+ ; Extract [32:48)
+ ; Check we correctly read 6 bits for the width on 64 bit BFEs.
+ ; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit
+ ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
+ ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+ %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
+ %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc
+ $sgpr0_sgpr1 = COPY %bfe
+...
More information about the llvm-commits
mailing list