[llvm] [LLVM] Make use of s_flbit_i32_b64 and s_ff1_i32_b64 (PR #75158)

Acim Maravic via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 03:16:24 PST 2023


================
@@ -7834,6 +7843,62 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
+void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
+                                          MachineInstr &Inst, unsigned Opcode,
+                                          MachineDominatorTree *MDT) const {
+  //  (S_FLBIT_I32_B64 hi:lo) ->
+  // -> (umin (S_FLBIT_I32_B32 hi), (uaddsat (S_FLBIT_I32_B32 lo), 32))
+  //  (S_FF1_I32_B64 hi:lo) ->
+  // ->(umin (uaddsat (S_FF1_I32_B32 hi), 32) (S_FF1_I32_B32 lo))
+
+  MachineBasicBlock &MBB = *Inst.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  MachineBasicBlock::iterator MII = Inst;
+  const DebugLoc &DL = Inst.getDebugLoc();
+
+  MachineOperand &Dest = Inst.getOperand(0);
+  MachineOperand &Src = Inst.getOperand(1);
+
+  const MCInstrDesc &InstDesc = get(Opcode);
+  bool IsCtlz = Opcode == AMDGPU::S_FLBIT_I32_B32;
+
+  const TargetRegisterClass *SrcRC =
+      Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
+
+  const TargetRegisterClass *SrcSubRC =
+      RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
+
+  MachineOperand SrcRegSub0 =
+      buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC);
+
+  MachineOperand SrcRegSub1 =
+      buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC);
+
+  Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register MidReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register MidReg3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register MidReg4 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+  MachineInstr *M1 = BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0);
+  MachineInstr *M2 = BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1);
+
+  BuildMI(MBB, MII, DL, get(AMDGPU::S_ADD_I32), MidReg3)
----------------
Acim-Maravic wrote:

After that is called function SIInstrInfo::getVALUOp, in which for S_ADD_I32 it returns V_ADD_U32_e64 or V_ADD_CO_U32_e32, which are both unsigned saturating adds. Correct me if I am wrong?


https://github.com/llvm/llvm-project/pull/75158


More information about the llvm-commits mailing list