[llvm] AMDGPU/GlobalISel: add RegBankLegalize rules for bitfield extract (PR #132381)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 27 08:27:20 PDT 2025
================
@@ -127,6 +131,117 @@ void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
MI.eraseFromParent();
}
+bool isSignedBFE(MachineInstr &MI) {
+ unsigned Opc =
+ isa<GIntrinsic>(MI) ? MI.getOperand(1).getIntrinsicID() : MI.getOpcode();
+
+ switch (Opc) {
+ case AMDGPU::G_SBFX:
+ case Intrinsic::amdgcn_sbfe:
+ return true;
+ case AMDGPU::G_UBFX:
+ case Intrinsic::amdgcn_ubfe:
+ return false;
+ default:
+ llvm_unreachable("Opcode not supported");
+ }
+}
+
+void RegBankLegalizeHelper::lowerDiv_BFE(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ assert(MRI.getType(Dst) == LLT::scalar(64));
+ bool Signed = isSignedBFE(MI);
+ unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
+ // Extract bitfield from Src, LSBit is the least-significant bit for the
+ // extraction (field offset) and Width is size of bitfield.
+ Register Src = MI.getOperand(FirstOpnd).getReg();
+ Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
+ Register Width = MI.getOperand(FirstOpnd + 2).getReg();
+ // Comments are for signed bitfield extract, similar for unsigned. x is sign
+ // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
+
+ // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
+ unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
+ auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
+
+ auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
+
+ // Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
+ // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
+ // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
+ if (!ConstWidth) {
+ auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
+ auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
+ B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
+ MI.eraseFromParent();
+ return;
+ }
+
+ auto WidthImm = ConstWidth->Value.getZExtValue();
+ auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
+ Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
+ Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
+ auto Zero = B.buildConstant({VgprRB, S32}, 0);
+ unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
+
+ if (WidthImm <= 32) {
+ // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
+ auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
+ MachineInstrBuilder Hi;
+ if (Signed) {
+ // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
+ Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
+ } else {
+ // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
+ Hi = Zero;
+ }
+ B.buildMergeLikeInstr(Dst, {Lo, Hi});
+ } else {
+ auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
+ // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
+ auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
+ B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
+ }
+
+ MI.eraseFromParent();
+ return;
+}
+
+void RegBankLegalizeHelper::lowerUni_BFE(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ bool Signed = isSignedBFE(MI);
+ unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
+ Register Src = MI.getOperand(FirstOpnd).getReg();
+ Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
+ Register Width = MI.getOperand(FirstOpnd + 2).getReg();
+ // For uniform bit field extract there are 4 available instructions, but
+ // LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
+ // field offset in low and size in high 16 bits.
+
+ // Src1 Hi16|Lo16 = Size|FieldOffset
+ auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
+ auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
+ auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
+ auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
+ unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+ unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
----------------
petar-avramovic wrote:
There are no tablegen patterns for S_BFE, known bits for GlobalISel are missing support for many generic opcodes not to mention target intrinsics. Can we leave that for another patch?
https://github.com/llvm/llvm-project/pull/132381
More information about the llvm-commits
mailing list