[llvm] 7d52b72 - [AMDGPU] Compute GISel KnownBits for S_BFE instructions (#141588)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 16 00:56:49 PDT 2025


Author: Pierre van Houtryve
Date: 2025-07-16T09:56:45+02:00
New Revision: 7d52b72239caba92a48e2cacbe5758e18efaed1c

URL: https://github.com/llvm/llvm-project/commit/7d52b72239caba92a48e2cacbe5758e18efaed1c
DIFF: https://github.com/llvm/llvm-project/commit/7d52b72239caba92a48e2cacbe5758e18efaed1c.diff

LOG: [AMDGPU] Compute GISel KnownBits for S_BFE instructions (#141588)

Next patches in the stack will emit them in the RegBankCombiner. With this, S_BFE instructions will hopefully interfere less with optimizations.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e449e1ab6cb20..dfe6f65d240e6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16910,12 +16910,63 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
   Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
 }
 
+static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
+                             KnownBits &Known, const APInt &DemandedElts,
+                             unsigned BFEWidth, bool SExt, unsigned Depth) {
+  const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
+  const MachineOperand &Src1 = MI.getOperand(2);
+
+  unsigned Src1Cst = 0;
+  if (Src1.isImm()) {
+    Src1Cst = Src1.getImm();
+  } else if (Src1.isReg()) {
+    auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
+    if (!Cst)
+      return;
+    Src1Cst = Cst->Value.getZExtValue();
+  } else {
+    return;
+  }
+
+  // Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit.
+  // Width is always [22:16].
+  const unsigned Offset =
+      Src1Cst & maskTrailingOnes<unsigned>((BFEWidth == 32) ? 5 : 6);
+  const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes<unsigned>(6);
+
+  if (Width >= BFEWidth) // Ill-formed.
+    return;
+
+  VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+                          Depth + 1);
+
+  Known = Known.extractBits(Width, Offset);
+
+  if (SExt)
+    Known = Known.sext(BFEWidth);
+  else
+    Known = Known.zext(BFEWidth);
+}
+
 void SITargetLowering::computeKnownBitsForTargetInstr(
     GISelValueTracking &VT, Register R, KnownBits &Known,
     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
     unsigned Depth) const {
+  Known.resetAll();
   const MachineInstr *MI = MRI.getVRegDef(R);
   switch (MI->getOpcode()) {
+  case AMDGPU::S_BFE_I32:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+                            /*SExt=*/true, Depth);
+  case AMDGPU::S_BFE_U32:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+                            /*SExt=*/false, Depth);
+  case AMDGPU::S_BFE_I64:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+                            /*SExt=*/true, Depth);
+  case AMDGPU::S_BFE_U64:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+                            /*SExt=*/false, Depth);
   case AMDGPU::G_INTRINSIC:
   case AMDGPU::G_INTRINSIC_CONVERGENT: {
     Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
new file mode 100644
index 0000000000000..7a6e07ddf2290
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
@@ -0,0 +1,253 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print<gisel-value-tracking>' %s -filetype=null 2>&1 | FileCheck %s
+
+---
+name: test_s_bfe_u32_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u32_constants
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i32_constants
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u64_constants
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+    %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+    %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i64_constants
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+    %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+    %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0
+
+  ; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+  ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+  ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+  ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24
+    %input:sgpr_32(s32) = COPY $sgpr0
+    %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+    %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+    %masked_input:sgpr_32(s32) = G_AND %input, %mask
+    %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+    %bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0
+
+  ; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+  ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+  ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+  ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26
+    %input:sgpr_32(s32) = COPY $sgpr0
+    %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+    %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+    %masked_input:sgpr_32(s32) = G_AND %input, %mask
+    %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+    %bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0_sgpr1
+
+  ; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+  ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+  ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+  ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56
+    %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+    %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+    %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+    %masked_input:sgpr_64(s64) = G_AND %input, %mask
+    %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+    %bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0_sgpr1
+
+  ; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+  ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+  ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+  ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58
+    %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+    %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+    %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+    %masked_input:sgpr_64(s64) = G_AND %input, %mask
+    %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+    %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_g_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants
+  ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %src0:sgpr_32(s32) = G_CONSTANT i32 65535
+    %src1:sgpr_32(s32) = G_CONSTANT i32 262156
+    %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_g_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants
+  ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+    %src0:sgpr_64(s64) = G_CONSTANT i64 65535
+    %src1:sgpr_32(s32) = G_CONSTANT i32 262156
+    %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_g_constants_lookthrough
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough
+  ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
+  ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %src0:sgpr_32(s32) = G_CONSTANT i32 65535
+    %src1:sgpr_32(s24) = G_CONSTANT i24 262156
+    %src1_ext:sgpr_32(s32) = G_ZEXT %src1
+    %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_g_constants_lookthrough
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough
+  ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
+  ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+    %src0:sgpr_64(s64) = G_CONSTANT i64 65535
+    %src1:sgpr_32(s24) = G_CONSTANT i24 262156
+    %src1_ext:sgpr_32(s32) = G_ZEXT %src1
+    %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_trash_bits
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; Check that the 6th bit is ignored for u32. The lower 6 bits are
+  ; 101100 but we should mask out the first 1 for the 32 bit version.
+  ; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_trash_bits
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; Check that the 6th bit is ignored for i32. The lower 6 bits are
+  ; 101100 but we should mask out the first 1 for the 32 bit version.
+  ; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants_sixth_bit
+body: |
+  bb.0:
+  ; Extract [32:48)
+  ; Check we correctly read 6 bits for the width on 64 bit BFEs.
+  ; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+    %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
+    %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants_sixth_bit
+body: |
+  bb.0:
+  ; Extract [32:48)
+  ; Check we correctly read 6 bits for the width on 64 bit BFEs.
+  ; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+    %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
+    %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...


        


More information about the llvm-commits mailing list