[llvm] [AMDGPU][NFC] Refactor SCC optimization (PR #165871)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 31 13:25:46 PDT 2025


https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/165871

>From 9c814b60c7a42d799a9249b6be2f1d03269af008 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 09:45:49 -0500
Subject: [PATCH 1/4] Refactor SCC optimization

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 61 +++++++++++++-------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21c2d7f5..9dd3bedf38bd7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10628,7 +10628,31 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
     return false;
 
-  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+  // SCC is already valid after SCCValid.
+  // SCCRedefine will redefine SCC to the same value already available after
+  // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+  // update kill/dead flags if necessary.
+  const auto optimizeSCC = [this](MachineInstr *SCCValid,
+                                  MachineInstr *SCCRedefine) -> bool {
+    MachineInstr *KillsSCC = nullptr;
+    for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+                                       SCCRedefine->getIterator())) {
+      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
+        return false;
+      if (MI.killsRegister(AMDGPU::SCC, &RI))
+        KillsSCC = &MI;
+    }
+    if (MachineOperand *SccDef =
+            SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+      SccDef->setIsDead(false);
+    if (KillsSCC)
+      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+    SCCRedefine->eraseFromParent();
+
+    return true;
+  };
+
+  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
                                   this]() -> bool {
     if (CmpValue != 0)
       return false;
@@ -10663,25 +10687,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
       return false;
 
-    MachineInstr *KillsSCC = nullptr;
-    for (MachineInstr &MI :
-         make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
-      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
-        return false;
-      if (MI.killsRegister(AMDGPU::SCC, &RI))
-        KillsSCC = &MI;
-    }
+    if (!optimizeSCC(Def, &CmpInstr))
+      return false;
 
-    if (MachineOperand *SccDef =
-            Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
-      SccDef->setIsDead(false);
-    if (KillsSCC)
-      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
-    CmpInstr.eraseFromParent();
     return true;
   };
 
-  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
+  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
                                this](int64_t ExpectedValue, unsigned SrcSize,
                                      bool IsReversible, bool IsSigned) -> bool {
     // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10755,21 +10767,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
       return false;
 
-    MachineInstr *KillsSCC = nullptr;
-    for (MachineInstr &MI :
-         make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
-      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
-        return false;
-      if (MI.killsRegister(AMDGPU::SCC, &RI))
-        KillsSCC = &MI;
-    }
-
-    MachineOperand *SccDef =
-        Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
-    SccDef->setIsDead(false);
-    if (KillsSCC)
-      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
-    CmpInstr.eraseFromParent();
+    if (!optimizeSCC(Def, &CmpInstr))
+      return false;
 
     if (!MRI->use_nodbg_empty(DefReg)) {
       assert(!IsReversedCC);

>From 4bb8bf0dd43f53de98b5510303cf214f5f2e7d64 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 09:59:20 -0500
Subject: [PATCH 2/4] Remove unnecessary this capture

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9dd3bedf38bd7..a92d0f0f81113 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10652,8 +10652,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     return true;
   };
 
-  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
-                                  this]() -> bool {
+  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+                                  optimizeSCC]() -> bool {
     if (CmpValue != 0)
       return false;
 

>From 7705dbb0771ed5fb4c54c9efd3c330854b0630cf Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 13:15:50 -0500
Subject: [PATCH 3/4] Make optimizeSCC a static function

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 55 +++++++++++++-------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a92d0f0f81113..777587243d2c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10618,6 +10618,29 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   return false;
 }
 
+// SCC is already valid after SCCValid.
+// SCCRedefine will redefine SCC to the same value already available after
+// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+// update kill/dead flags if necessary.
+static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
+                        const SIRegisterInfo *RI) {
+  MachineInstr *KillsSCC = nullptr;
+  for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+                                     SCCRedefine->getIterator())) {
+    if (MI.modifiesRegister(AMDGPU::SCC, RI))
+      return false;
+    if (MI.killsRegister(AMDGPU::SCC, RI))
+      KillsSCC = &MI;
+  }
+  if (MachineOperand *SccDef =
+          SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+    SccDef->setIsDead(false);
+  if (KillsSCC)
+    KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+  SCCRedefine->eraseFromParent();
+  return true;
+}
+
 bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
                                        Register SrcReg2, int64_t CmpMask,
                                        int64_t CmpValue,
@@ -10628,32 +10651,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
     return false;
 
-  // SCC is already valid after SCCValid.
-  // SCCRedefine will redefine SCC to the same value already available after
-  // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
-  // update kill/dead flags if necessary.
-  const auto optimizeSCC = [this](MachineInstr *SCCValid,
-                                  MachineInstr *SCCRedefine) -> bool {
-    MachineInstr *KillsSCC = nullptr;
-    for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
-                                       SCCRedefine->getIterator())) {
-      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
-        return false;
-      if (MI.killsRegister(AMDGPU::SCC, &RI))
-        KillsSCC = &MI;
-    }
-    if (MachineOperand *SccDef =
-            SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
-      SccDef->setIsDead(false);
-    if (KillsSCC)
-      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
-    SCCRedefine->eraseFromParent();
-
-    return true;
-  };
-
   const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
-                                  optimizeSCC]() -> bool {
+                                  this]() -> bool {
     if (CmpValue != 0)
       return false;
 
@@ -10687,13 +10686,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
       return false;
 
-    if (!optimizeSCC(Def, &CmpInstr))
+    if (!optimizeSCC(Def, &CmpInstr, &RI))
       return false;
 
     return true;
   };
 
-  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
+  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
                                this](int64_t ExpectedValue, unsigned SrcSize,
                                      bool IsReversible, bool IsSigned) -> bool {
     // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10767,7 +10766,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
       return false;
 
-    if (!optimizeSCC(Def, &CmpInstr))
+    if (!optimizeSCC(Def, &CmpInstr, &RI))
       return false;
 
     if (!MRI->use_nodbg_empty(DefReg)) {

>From 3b621ae6ea8a535caf85f16c88665a11068466bc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 13:25:54 -0500
Subject: [PATCH 4/4] Make foldableSelect a static function

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 777587243d2c7..07d016c9858fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10641,6 +10641,19 @@ static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
   return true;
 }
 
+static bool foldableSelect(MachineInstr *Def) {
+  if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
+      Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
+    bool Op1IsNonZeroImm =
+        Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
+    bool Op2IsZeroImm =
+        Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
+    if (Op1IsNonZeroImm && Op2IsZeroImm)
+      return true;
+  }
+  return false;
+}
+
 bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
                                        Register SrcReg2, int64_t CmpMask,
                                        int64_t CmpValue,
@@ -10660,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (!Def || Def->getParent() != CmpInstr.getParent())
       return false;
 
-    const auto foldableSelect = [](MachineInstr *Def) -> bool {
-      if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
-          Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
-        bool Op1IsNonZeroImm =
-            Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
-        bool Op2IsZeroImm =
-            Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
-        if (Op1IsNonZeroImm && Op2IsZeroImm)
-          return true;
-      }
-      return false;
-    };
-
     // For S_OP that set SCC = DST!=0, do the transformation
     //
     //   s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)



More information about the llvm-commits mailing list