[llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bitfield extract (PR #132381)

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 27 08:27:20 PDT 2025


================
@@ -127,6 +131,117 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
   MI.eraseFromParent();
 }
 
+bool isSignedBFE(MachineInstr &MI) {
+  unsigned Opc =
+      isa<GIntrinsic>(MI) ? MI.getOperand(1).getIntrinsicID() : MI.getOpcode();
+
+  switch (Opc) {
+  case AMDGPU::G_SBFX:
+  case Intrinsic::amdgcn_sbfe:
+    return true;
+  case AMDGPU::G_UBFX:
+  case Intrinsic::amdgcn_ubfe:
+    return false;
+  default:
+    llvm_unreachable("Opcode not supported");
+  }
+}
+
+void RegBankLegalizeHelper::lowerDiv_BFE(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  assert(MRI.getType(Dst) == LLT::scalar(64));
+  bool Signed = isSignedBFE(MI);
+  unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
+  // Extract bitfield from Src, LSBit is the least-significant bit for the
+  // extraction (field offset) and Width is size of bitfield.
+  Register Src = MI.getOperand(FirstOpnd).getReg();
+  Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
+  Register Width = MI.getOperand(FirstOpnd + 2).getReg();
+  // Comments are for signed bitfield extract, similar for unsigned. x is sign
+  // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
+
+  // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
+  unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
+  auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
+
+  auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
+
+  // Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
+  // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
+  // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
+  if (!ConstWidth) {
+    auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
+    auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
+    B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
+    MI.eraseFromParent();
+    return;
+  }
+
+  auto WidthImm = ConstWidth->Value.getZExtValue();
+  auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
+  Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
+  Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
+  auto Zero = B.buildConstant({VgprRB, S32}, 0);
+  unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
+
+  if (WidthImm <= 32) {
+    // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
+    auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
+    MachineInstrBuilder Hi;
+    if (Signed) {
+      // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
+      Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
+    } else {
+      // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
+      Hi = Zero;
+    }
+    B.buildMergeLikeInstr(Dst, {Lo, Hi});
+  } else {
+    auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
+    // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
+    auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
+    B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
+  }
+
+  MI.eraseFromParent();
+  return;
+}
+
+void RegBankLegalizeHelper::lowerUni_BFE(MachineInstr &MI) {
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(DstReg);
+  bool Signed = isSignedBFE(MI);
+  unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
+  Register Src = MI.getOperand(FirstOpnd).getReg();
+  Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
+  Register Width = MI.getOperand(FirstOpnd + 2).getReg();
+  // For uniform bit field extract there are 4 available instructions, but
+  // LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
+  // field offset in low and size in high 16 bits.
+
+  // Src1 Hi16|Lo16 = Size|FieldOffset
+  auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
+  auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
+  auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
+  auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
+  unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+  unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
----------------
petar-avramovic wrote:

There are no tablegen patterns for S_BFE, known bits for GlobalISel are missing support for many generic opcodes not to mention target intrinsics. Can we leave that for another patch? 

https://github.com/llvm/llvm-project/pull/132381


More information about the llvm-commits mailing list