[llvm] [LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 (PR #75158)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 07:53:07 PST 2023
================
@@ -7834,6 +7843,62 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
+void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
+ MachineInstr &Inst, unsigned Opcode,
+ MachineDominatorTree *MDT) const {
+ // (S_FLBIT_I32_B64 hi:lo) ->
+ // -> (umin (S_FLBIT_I32_B32 hi), (uaddsat (S_FLBIT_I32_B32 lo), 32))
+ // (S_FF1_I32_B64 hi:lo) ->
+ // ->(umin (uaddsat (S_FF1_I32_B32 hi), 32) (S_FF1_I32_B32 lo))
+
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineBasicBlock::iterator MII = Inst;
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src = Inst.getOperand(1);
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+ bool IsCtlz = Opcode == AMDGPU::S_FLBIT_I32_B32;
+
+ const TargetRegisterClass *SrcRC =
+ Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
+
+ const TargetRegisterClass *SrcSubRC =
+ RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
+
+ MachineOperand SrcRegSub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC);
+
+ MachineOperand SrcRegSub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC);
+
+ Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register MidReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register MidReg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register MidReg4 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ MachineInstr *M1 = BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0);
+ MachineInstr *M2 = BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_ADD_I32), MidReg3)
----------------
jayfoad wrote:
Doesn't this need to be an unsigned saturating add?
https://github.com/llvm/llvm-project/pull/75158
More information about the llvm-commits
mailing list