[llvm] [AMDGPU] Compute GISel KnownBits for S_BFE instructions (PR #141588)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Wed May 28 02:17:35 PDT 2025


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141588

>From 2e18e5dc22e402d563a2cca2a8f764647f77634b Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 27 May 2025 10:26:24 +0200
Subject: [PATCH 1/4] [AMDGPU] Compute GISel KnownBits for S_BFE instructions

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 45 +++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ade88a16193b8..1a1490b55d3e4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16502,12 +16502,57 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
   Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
 }
 
+static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
+                             KnownBits &Known, const APInt &DemandedElts,
+                             unsigned BFEWidth, bool SExt) {
+  const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
+  const MachineOperand &Src1 = MI.getOperand(2);
+
+  unsigned Src1Cst = 0;
+  if (Src1.isImm())
+    Src1Cst = Src1.getImm();
+  else if (Src1.isReg()) {
+    auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
+    if (!Cst)
+      return;
+    Src1Cst = Cst->Value.getZExtValue();
+  } else
+    return;
+
+  const unsigned Mask = maskTrailingOnes<unsigned>(6);
+  const unsigned Offset = Src1Cst & Mask;
+  const unsigned Width = (Src1Cst >> 16) & Mask;
+
+  VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
+
+  const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
+  Known.Zero = Known.Zero.shl(Offset) & WidthMask;
+  Known.One = Known.One.shl(Offset) & WidthMask;
+
+  if (SExt)
+    Known.sextInReg(Width);
+  else
+    Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
+}
+
 void SITargetLowering::computeKnownBitsForTargetInstr(
     GISelValueTracking &VT, Register R, KnownBits &Known,
     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
     unsigned Depth) const {
   const MachineInstr *MI = MRI.getVRegDef(R);
   switch (MI->getOpcode()) {
+  case AMDGPU::S_BFE_I32:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+                            /*SExt=*/true);
+  case AMDGPU::S_BFE_U32:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+                            /*SExt=*/false);
+  case AMDGPU::S_BFE_I64:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+                            /*SExt=*/true);
+  case AMDGPU::S_BFE_U64:
+    return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+                            /*SExt=*/false);
   case AMDGPU::G_INTRINSIC:
   case AMDGPU::G_INTRINSIC_CONVERGENT: {
     Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();

>From 2fae5d5b3f1a20304d2b762222bea2d97cbd22f2 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 27 May 2025 15:44:52 +0200
Subject: [PATCH 2/4] Fixes + add tests

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  12 +-
 .../AMDGPU/GlobalISel/known-bits-sbfe.mir     | 139 ++++++++++++++++++
 2 files changed, 146 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1a1490b55d3e4..f10fc1f982a74 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16525,20 +16525,22 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
 
   VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
 
-  const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
-  Known.Zero = Known.Zero.shl(Offset) & WidthMask;
-  Known.One = Known.One.shl(Offset) & WidthMask;
+  Known.Zero = Known.Zero.lshr(Offset);
+  Known.One = Known.One.lshr(Offset);
+
+  Known = Known.trunc(Width);
 
   if (SExt)
-    Known.sextInReg(Width);
+    Known = Known.sext(BFEWidth);
   else
-    Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
+    Known = Known.zext(BFEWidth);
 }
 
 void SITargetLowering::computeKnownBitsForTargetInstr(
     GISelValueTracking &VT, Register R, KnownBits &Known,
     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
     unsigned Depth) const {
+  Known.resetAll();
   const MachineInstr *MI = MRI.getVRegDef(R);
   switch (MI->getOpcode()) {
   case AMDGPU::S_BFE_I32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
new file mode 100644
index 0000000000000..85f1f66c624d9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
@@ -0,0 +1,139 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes="print<gisel-value-tracking>" %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: test_s_bfe_u32_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u32_constants
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i32_constants
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u64_constants
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+    %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+    %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i64_constants
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+    %cst:sgpr_64(s64) = G_CONSTANT i64 65535
+    %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0
+
+  ; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+  ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+  ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+  ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24
+    %input:sgpr_32(s32) = COPY $sgpr0
+    %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+    %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+    %masked_input:sgpr_32(s32) = G_AND %input, %mask
+    %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+    %bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0
+
+  ; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16
+  ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18
+  ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18
+  ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26
+    %input:sgpr_32(s32) = COPY $sgpr0
+    %cst:sgpr_32(s32) = G_CONSTANT i32 50175
+    %mask:sgpr_32(s32) = G_CONSTANT i32 15360
+    %masked_input:sgpr_32(s32) = G_AND %input, %mask
+    %merged:sgpr_32(s32) = G_OR %masked_input, %cst
+    %bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0_sgpr1
+
+  ; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+  ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+  ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+  ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56
+    %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+    %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+    %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+    %masked_input:sgpr_64(s64) = G_AND %input, %mask
+    %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+    %bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_middle_bits_unknown
+body: |
+  bb.0:
+  ; Extract [8:16) but the middle 4 bits are ????
+    liveins: $sgpr0_sgpr1
+
+  ; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown
+  ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48
+  ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50
+  ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50
+  ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58
+    %input:sgpr_64(s64) = COPY $sgpr0_sgpr1
+    %cst:sgpr_64(s64) = G_CONSTANT i64 50175
+    %mask:sgpr_64(s64) = G_CONSTANT i64 15360
+    %masked_input:sgpr_64(s64) = G_AND %input, %mask
+    %merged:sgpr_64(s64) = G_OR %masked_input, %cst
+    %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...

>From 7a7b474628910c7aef9a201c9591217b0a693402 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 28 May 2025 10:31:04 +0200
Subject: [PATCH 3/4] Address comment and add more tests

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  15 ++-
 .../AMDGPU/GlobalISel/known-bits-sbfe.mir     | 116 +++++++++++++++++-
 2 files changed, 124 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f10fc1f982a74..83fc54b08fc0c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16509,19 +16509,22 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
   const MachineOperand &Src1 = MI.getOperand(2);
 
   unsigned Src1Cst = 0;
-  if (Src1.isImm())
+  if (Src1.isImm()) {
     Src1Cst = Src1.getImm();
-  else if (Src1.isReg()) {
+  } else if (Src1.isReg()) {
     auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
     if (!Cst)
       return;
     Src1Cst = Cst->Value.getZExtValue();
-  } else
+  } else {
     return;
+  }
 
-  const unsigned Mask = maskTrailingOnes<unsigned>(6);
-  const unsigned Offset = Src1Cst & Mask;
-  const unsigned Width = (Src1Cst >> 16) & Mask;
+  // Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit.
+  // Width is always [22:16].
+  const unsigned Offset =
+      Src1Cst & maskTrailingOnes<unsigned>((BFEWidth == 32) ? 5 : 6);
+  const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes<unsigned>(6);
 
   VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
index 85f1f66c624d9..7a6e07ddf2290 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes="print<gisel-value-tracking>" %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print<gisel-value-tracking>' %s -filetype=null 2>&1 | FileCheck %s
 
 ---
 name: test_s_bfe_u32_constants
@@ -137,3 +137,117 @@ body: |
     %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc
     $sgpr0_sgpr1 = COPY %bfe
 ...
+---
+name: test_s_bfe_i32_g_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants
+  ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %src0:sgpr_32(s32) = G_CONSTANT i32 65535
+    %src1:sgpr_32(s32) = G_CONSTANT i32 262156
+    %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_g_constants
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants
+  ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+    %src0:sgpr_64(s64) = G_CONSTANT i64 65535
+    %src1:sgpr_32(s32) = G_CONSTANT i32 262156
+    %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_g_constants_lookthrough
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough
+  ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
+  ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %src0:sgpr_32(s32) = G_CONSTANT i32 65535
+    %src1:sgpr_32(s24) = G_CONSTANT i24 262156
+    %src1_ext:sgpr_32(s32) = G_ZEXT %src1
+    %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_g_constants_lookthrough
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough
+  ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+  ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5
+  ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60
+    %src0:sgpr_64(s64) = G_CONSTANT i64 65535
+    %src1:sgpr_32(s24) = G_CONSTANT i24 262156
+    %src1_ext:sgpr_32(s32) = G_ZEXT %src1
+    %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_u32_trash_bits
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; Check that the 6th bit is ignored for u32. The lower 6 bits are
+  ; 101100 but we should mask out the first 1 for the 32 bit version.
+  ; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_i32_trash_bits
+body: |
+  bb.0:
+  ; Extract [12:16)
+  ; Check that the 6th bit is ignored for i32. The lower 6 bits are
+  ; 101100 but we should mask out the first 1 for the 32 bit version.
+  ; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits
+  ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32
+    %cst:sgpr_32(s32) = G_CONSTANT i32 65535
+    %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc
+    $sgpr0 = COPY %bfe
+...
+---
+name: test_s_bfe_u64_constants_sixth_bit
+body: |
+  bb.0:
+  ; Extract [32:48)
+  ; Check we correctly read 6 bits for the width on 64 bit BFEs.
+  ; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48
+    %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
+    %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...
+---
+name: test_s_bfe_i64_constants_sixth_bit
+body: |
+  bb.0:
+  ; Extract [32:48)
+  ; Check we correctly read 6 bits for the width on 64 bit BFEs.
+  ; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit
+  ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16
+  ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64
+    %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360
+    %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc
+    $sgpr0_sgpr1 = COPY %bfe
+...

>From 83cc936991d48c1a7e8d80b56370a2d87773c03e Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 28 May 2025 11:13:54 +0200
Subject: [PATCH 4/4] Propagate depth correctly

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 83fc54b08fc0c..f10dc27223593 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16504,7 +16504,7 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
 
 static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
                              KnownBits &Known, const APInt &DemandedElts,
-                             unsigned BFEWidth, bool SExt) {
+                             unsigned BFEWidth, bool SExt, unsigned Depth) {
   const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
   const MachineOperand &Src1 = MI.getOperand(2);
 
@@ -16526,7 +16526,8 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
       Src1Cst & maskTrailingOnes<unsigned>((BFEWidth == 32) ? 5 : 6);
   const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes<unsigned>(6);
 
-  VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
+  VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+                          Depth + 1);
 
   Known.Zero = Known.Zero.lshr(Offset);
   Known.One = Known.One.lshr(Offset);
@@ -16548,16 +16549,16 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
   switch (MI->getOpcode()) {
   case AMDGPU::S_BFE_I32:
     return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
-                            /*SExt=*/true);
+                            /*SExt=*/true, Depth);
   case AMDGPU::S_BFE_U32:
     return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
-                            /*SExt=*/false);
+                            /*SExt=*/false, Depth);
   case AMDGPU::S_BFE_I64:
     return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
-                            /*SExt=*/true);
+                            /*SExt=*/true, Depth);
   case AMDGPU::S_BFE_U64:
     return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
-                            /*SExt=*/false);
+                            /*SExt=*/false, Depth);
   case AMDGPU::G_INTRINSIC:
   case AMDGPU::G_INTRINSIC_CONVERGENT: {
     Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();



More information about the llvm-commits mailing list