[llvm] [AMDGPU] Remove redundant s_cmp_lg_* sX, 0 (PR #162352)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 8 14:04:28 PDT 2025


================
@@ -10608,6 +10608,73 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
     return false;
 
+  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+                                  this]() -> bool {
+    if (CmpValue != 0)
+      return false;
+
+    MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
+    if (!Def || Def->getParent() != CmpInstr.getParent())
+      return false;
+
+    if (!(Def->getOpcode() == AMDGPU::S_LSHL_B32 ||
+          Def->getOpcode() == AMDGPU::S_LSHL_B64 ||
+          Def->getOpcode() == AMDGPU::S_LSHR_B32 ||
+          Def->getOpcode() == AMDGPU::S_LSHR_B64 ||
+          Def->getOpcode() == AMDGPU::S_AND_B32 ||
+          Def->getOpcode() == AMDGPU::S_AND_B64 ||
+          Def->getOpcode() == AMDGPU::S_OR_B32 ||
+          Def->getOpcode() == AMDGPU::S_OR_B64 ||
+          Def->getOpcode() == AMDGPU::S_XOR_B32 ||
+          Def->getOpcode() == AMDGPU::S_XOR_B64 ||
+          Def->getOpcode() == AMDGPU::S_NAND_B32 ||
+          Def->getOpcode() == AMDGPU::S_NAND_B64 ||
+          Def->getOpcode() == AMDGPU::S_NOR_B32 ||
+          Def->getOpcode() == AMDGPU::S_NOR_B64 ||
+          Def->getOpcode() == AMDGPU::S_XNOR_B32 ||
+          Def->getOpcode() == AMDGPU::S_XNOR_B64 ||
+          Def->getOpcode() == AMDGPU::S_ANDN2_B32 ||
+          Def->getOpcode() == AMDGPU::S_ANDN2_B64 ||
+          Def->getOpcode() == AMDGPU::S_ORN2_B32 ||
+          Def->getOpcode() == AMDGPU::S_ORN2_B64 ||
+          Def->getOpcode() == AMDGPU::S_BFE_I32 ||
+          Def->getOpcode() == AMDGPU::S_BFE_I64 ||
+          Def->getOpcode() == AMDGPU::S_BFE_U32 ||
+          Def->getOpcode() == AMDGPU::S_BFE_U64 ||
+          Def->getOpcode() == AMDGPU::S_BCNT0_I32_B32 ||
+          Def->getOpcode() == AMDGPU::S_BCNT0_I32_B64 ||
+          Def->getOpcode() == AMDGPU::S_BCNT1_I32_B32 ||
+          Def->getOpcode() == AMDGPU::S_BCNT1_I32_B64 ||
+          Def->getOpcode() == AMDGPU::S_QUADMASK_B32 ||
+          Def->getOpcode() == AMDGPU::S_QUADMASK_B64 ||
+          Def->getOpcode() == AMDGPU::S_NOT_B32 ||
+          Def->getOpcode() == AMDGPU::S_NOT_B64 ||
+
+          ((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
+            Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
+           Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
+           !Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())))
+      return false;
+
+    for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
+         I != E; ++I) {
+      if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
+          I->killsRegister(AMDGPU::SCC, &RI))
+        return false;
+    }
+
+    if (!(Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
+          Def->getOpcode() == AMDGPU::S_CSELECT_B64)) {
----------------
LU-JOHN wrote:

Recoded to check for opcodes that define SCC, but exclude those that do not set SCC = DST!=0.

https://github.com/llvm/llvm-project/pull/162352


More information about the llvm-commits mailing list