[llvm] [AMDGPU][NFC] Refactor SCC optimization (PR #165871)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 31 14:12:06 PDT 2025


https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/165871

>From 9c814b60c7a42d799a9249b6be2f1d03269af008 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 09:45:49 -0500
Subject: [PATCH 1/5] Refactor SCC optimization

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 61 +++++++++++++-------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21c2d7f5..9dd3bedf38bd7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10628,7 +10628,31 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
     return false;
 
-  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+  // SCC is already valid after SCCValid.
+  // SCCRedefine will redefine SCC to the same value already available after
+  // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+  // update kill/dead flags if necessary.
+  const auto optimizeSCC = [this](MachineInstr *SCCValid,
+                                  MachineInstr *SCCRedefine) -> bool {
+    MachineInstr *KillsSCC = nullptr;
+    for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+                                       SCCRedefine->getIterator())) {
+      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
+        return false;
+      if (MI.killsRegister(AMDGPU::SCC, &RI))
+        KillsSCC = &MI;
+    }
+    if (MachineOperand *SccDef =
+            SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+      SccDef->setIsDead(false);
+    if (KillsSCC)
+      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+    SCCRedefine->eraseFromParent();
+
+    return true;
+  };
+
+  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
                                   this]() -> bool {
     if (CmpValue != 0)
       return false;
@@ -10663,25 +10687,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
       return false;
 
-    MachineInstr *KillsSCC = nullptr;
-    for (MachineInstr &MI :
-         make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
-      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
-        return false;
-      if (MI.killsRegister(AMDGPU::SCC, &RI))
-        KillsSCC = &MI;
-    }
+    if (!optimizeSCC(Def, &CmpInstr))
+      return false;
 
-    if (MachineOperand *SccDef =
-            Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
-      SccDef->setIsDead(false);
-    if (KillsSCC)
-      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
-    CmpInstr.eraseFromParent();
     return true;
   };
 
-  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
+  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
                                this](int64_t ExpectedValue, unsigned SrcSize,
                                      bool IsReversible, bool IsSigned) -> bool {
     // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10755,21 +10767,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
       return false;
 
-    MachineInstr *KillsSCC = nullptr;
-    for (MachineInstr &MI :
-         make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
-      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
-        return false;
-      if (MI.killsRegister(AMDGPU::SCC, &RI))
-        KillsSCC = &MI;
-    }
-
-    MachineOperand *SccDef =
-        Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
-    SccDef->setIsDead(false);
-    if (KillsSCC)
-      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
-    CmpInstr.eraseFromParent();
+    if (!optimizeSCC(Def, &CmpInstr))
+      return false;
 
     if (!MRI->use_nodbg_empty(DefReg)) {
       assert(!IsReversedCC);

>From 4bb8bf0dd43f53de98b5510303cf214f5f2e7d64 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 09:59:20 -0500
Subject: [PATCH 2/5] Remove unnecessary this capture

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9dd3bedf38bd7..a92d0f0f81113 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10652,8 +10652,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     return true;
   };
 
-  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
-                                  this]() -> bool {
+  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+                                  optimizeSCC]() -> bool {
     if (CmpValue != 0)
       return false;
 

>From 7705dbb0771ed5fb4c54c9efd3c330854b0630cf Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 13:15:50 -0500
Subject: [PATCH 3/5] Make optimizeSCC a static function

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 55 +++++++++++++-------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a92d0f0f81113..777587243d2c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10618,6 +10618,29 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   return false;
 }
 
+// SCC is already valid after SCCValid.
+// SCCRedefine will redefine SCC to the same value already available after
+// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+// update kill/dead flags if necessary.
+static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
+                        const SIRegisterInfo *RI) {
+  MachineInstr *KillsSCC = nullptr;
+  for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+                                     SCCRedefine->getIterator())) {
+    if (MI.modifiesRegister(AMDGPU::SCC, RI))
+      return false;
+    if (MI.killsRegister(AMDGPU::SCC, RI))
+      KillsSCC = &MI;
+  }
+  if (MachineOperand *SccDef =
+          SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+    SccDef->setIsDead(false);
+  if (KillsSCC)
+    KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+  SCCRedefine->eraseFromParent();
+  return true;
+}
+
 bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
                                        Register SrcReg2, int64_t CmpMask,
                                        int64_t CmpValue,
@@ -10628,32 +10651,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
     return false;
 
-  // SCC is already valid after SCCValid.
-  // SCCRedefine will redefine SCC to the same value already available after
-  // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
-  // update kill/dead flags if necessary.
-  const auto optimizeSCC = [this](MachineInstr *SCCValid,
-                                  MachineInstr *SCCRedefine) -> bool {
-    MachineInstr *KillsSCC = nullptr;
-    for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
-                                       SCCRedefine->getIterator())) {
-      if (MI.modifiesRegister(AMDGPU::SCC, &RI))
-        return false;
-      if (MI.killsRegister(AMDGPU::SCC, &RI))
-        KillsSCC = &MI;
-    }
-    if (MachineOperand *SccDef =
-            SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
-      SccDef->setIsDead(false);
-    if (KillsSCC)
-      KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
-    SCCRedefine->eraseFromParent();
-
-    return true;
-  };
-
   const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
-                                  optimizeSCC]() -> bool {
+                                  this]() -> bool {
     if (CmpValue != 0)
       return false;
 
@@ -10687,13 +10686,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
       return false;
 
-    if (!optimizeSCC(Def, &CmpInstr))
+    if (!optimizeSCC(Def, &CmpInstr, &RI))
       return false;
 
     return true;
   };
 
-  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
+  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
                                this](int64_t ExpectedValue, unsigned SrcSize,
                                      bool IsReversible, bool IsSigned) -> bool {
     // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10767,7 +10766,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
       return false;
 
-    if (!optimizeSCC(Def, &CmpInstr))
+    if (!optimizeSCC(Def, &CmpInstr, &RI))
       return false;
 
     if (!MRI->use_nodbg_empty(DefReg)) {

>From 3b621ae6ea8a535caf85f16c88665a11068466bc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 13:25:54 -0500
Subject: [PATCH 4/5] Make foldableSelect a static function

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 777587243d2c7..07d016c9858fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10641,6 +10641,19 @@ static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
   return true;
 }
 
+static bool foldableSelect(MachineInstr *Def) {
+  if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
+      Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
+    bool Op1IsNonZeroImm =
+        Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
+    bool Op2IsZeroImm =
+        Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
+    if (Op1IsNonZeroImm && Op2IsZeroImm)
+      return true;
+  }
+  return false;
+}
+
 bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
                                        Register SrcReg2, int64_t CmpMask,
                                        int64_t CmpValue,
@@ -10660,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (!Def || Def->getParent() != CmpInstr.getParent())
       return false;
 
-    const auto foldableSelect = [](MachineInstr *Def) -> bool {
-      if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
-          Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
-        bool Op1IsNonZeroImm =
-            Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
-        bool Op2IsZeroImm =
-            Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
-        if (Op1IsNonZeroImm && Op2IsZeroImm)
-          return true;
-      }
-      return false;
-    };
-
     // For S_OP that set SCC = DST!=0, do the transformation
     //
     //   s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)

>From 5a0b0b752f6f4776f796aebe8af7aa1886031abd Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 16:11:53 -0500
Subject: [PATCH 5/5] Address feedback

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 34 +++++++++++++-------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 07d016c9858fd..d9f76c9a59d00 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10623,13 +10623,13 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
 // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
 // update kill/dead flags if necessary.
 static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
-                        const SIRegisterInfo *RI) {
+                        const SIRegisterInfo &RI) {
   MachineInstr *KillsSCC = nullptr;
   for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
                                      SCCRedefine->getIterator())) {
-    if (MI.modifiesRegister(AMDGPU::SCC, RI))
+    if (MI.modifiesRegister(AMDGPU::SCC, &RI))
       return false;
-    if (MI.killsRegister(AMDGPU::SCC, RI))
+    if (MI.killsRegister(AMDGPU::SCC, &RI))
       KillsSCC = &MI;
   }
   if (MachineOperand *SccDef =
@@ -10641,17 +10641,17 @@ static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
   return true;
 }
 
-static bool foldableSelect(MachineInstr *Def) {
-  if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
-      Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
-    bool Op1IsNonZeroImm =
-        Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
-    bool Op2IsZeroImm =
-        Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
-    if (Op1IsNonZeroImm && Op2IsZeroImm)
-      return true;
-  }
-  return false;
+static bool foldableSelect(const MachineInstr &Def) {
+  if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
+      Def.getOpcode() != AMDGPU::S_CSELECT_B64)
+    return false;
+  bool Op1IsNonZeroImm =
+      Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
+  bool Op2IsZeroImm =
+      Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
+  if (!Op1IsNonZeroImm || !Op2IsZeroImm)
+    return false;
+  return true;
 }
 
 bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
@@ -10683,10 +10683,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     //
     //   s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
     //   imm), 0)
-    if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
+    if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def))
       return false;
 
-    if (!optimizeSCC(Def, &CmpInstr, &RI))
+    if (!optimizeSCC(Def, &CmpInstr, RI))
       return false;
 
     return true;
@@ -10766,7 +10766,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
     if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
       return false;
 
-    if (!optimizeSCC(Def, &CmpInstr, &RI))
+    if (!optimizeSCC(Def, &CmpInstr, RI))
       return false;
 
     if (!MRI->use_nodbg_empty(DefReg)) {



More information about the llvm-commits mailing list