[llvm] [AMDGPU][NFC] Refactor SCC optimization (PR #165871)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 14:12:06 PDT 2025
https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/165871
>From 9c814b60c7a42d799a9249b6be2f1d03269af008 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 09:45:49 -0500
Subject: [PATCH 1/5] Refactor SCC optimization
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 61 +++++++++++++-------------
1 file changed, 30 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d930a21c2d7f5..9dd3bedf38bd7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10628,7 +10628,31 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
return false;
- const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+ // SCC is already valid after SCCValid.
+ // SCCRedefine will redefine SCC to the same value already available after
+ // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+ // update kill/dead flags if necessary.
+ const auto optimizeSCC = [this](MachineInstr *SCCValid,
+ MachineInstr *SCCRedefine) -> bool {
+ MachineInstr *KillsSCC = nullptr;
+ for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+ SCCRedefine->getIterator())) {
+ if (MI.modifiesRegister(AMDGPU::SCC, &RI))
+ return false;
+ if (MI.killsRegister(AMDGPU::SCC, &RI))
+ KillsSCC = &MI;
+ }
+ if (MachineOperand *SccDef =
+ SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+ SccDef->setIsDead(false);
+ if (KillsSCC)
+ KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+ SCCRedefine->eraseFromParent();
+
+ return true;
+ };
+
+ const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
this]() -> bool {
if (CmpValue != 0)
return false;
@@ -10663,25 +10687,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
return false;
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI :
- make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
+ if (!optimizeSCC(Def, &CmpInstr))
+ return false;
- if (MachineOperand *SccDef =
- Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- CmpInstr.eraseFromParent();
return true;
};
- const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
this](int64_t ExpectedValue, unsigned SrcSize,
bool IsReversible, bool IsSigned) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10755,21 +10767,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI :
- make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
-
- MachineOperand *SccDef =
- Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- CmpInstr.eraseFromParent();
+ if (!optimizeSCC(Def, &CmpInstr))
+ return false;
if (!MRI->use_nodbg_empty(DefReg)) {
assert(!IsReversedCC);
>From 4bb8bf0dd43f53de98b5510303cf214f5f2e7d64 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 09:59:20 -0500
Subject: [PATCH 2/5] Remove unnecessary this capture
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9dd3bedf38bd7..a92d0f0f81113 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10652,8 +10652,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return true;
};
- const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
- this]() -> bool {
+ const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
+ optimizeSCC]() -> bool {
if (CmpValue != 0)
return false;
>From 7705dbb0771ed5fb4c54c9efd3c330854b0630cf Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 13:15:50 -0500
Subject: [PATCH 3/5] Make optimizeSCC a static function
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 55 +++++++++++++-------------
1 file changed, 27 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a92d0f0f81113..777587243d2c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10618,6 +10618,29 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}
+// SCC is already valid after SCCValid.
+// SCCRedefine will redefine SCC to the same value already available after
+// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
+// update kill/dead flags if necessary.
+static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
+ const SIRegisterInfo *RI) {
+ MachineInstr *KillsSCC = nullptr;
+ for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
+ SCCRedefine->getIterator())) {
+ if (MI.modifiesRegister(AMDGPU::SCC, RI))
+ return false;
+ if (MI.killsRegister(AMDGPU::SCC, RI))
+ KillsSCC = &MI;
+ }
+ if (MachineOperand *SccDef =
+ SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
+ SccDef->setIsDead(false);
+ if (KillsSCC)
+ KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
+ SCCRedefine->eraseFromParent();
+ return true;
+}
+
bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Register SrcReg2, int64_t CmpMask,
int64_t CmpValue,
@@ -10628,32 +10651,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
return false;
- // SCC is already valid after SCCValid.
- // SCCRedefine will redefine SCC to the same value already available after
- // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
- // update kill/dead flags if necessary.
- const auto optimizeSCC = [this](MachineInstr *SCCValid,
- MachineInstr *SCCRedefine) -> bool {
- MachineInstr *KillsSCC = nullptr;
- for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
- SCCRedefine->getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, &RI))
- return false;
- if (MI.killsRegister(AMDGPU::SCC, &RI))
- KillsSCC = &MI;
- }
- if (MachineOperand *SccDef =
- SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
- SccDef->setIsDead(false);
- if (KillsSCC)
- KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
- SCCRedefine->eraseFromParent();
-
- return true;
- };
-
const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
- optimizeSCC]() -> bool {
+ this]() -> bool {
if (CmpValue != 0)
return false;
@@ -10687,13 +10686,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
return false;
- if (!optimizeSCC(Def, &CmpInstr))
+ if (!optimizeSCC(Def, &CmpInstr, &RI))
return false;
return true;
};
- const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
this](int64_t ExpectedValue, unsigned SrcSize,
bool IsReversible, bool IsSigned) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10767,7 +10766,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;
- if (!optimizeSCC(Def, &CmpInstr))
+ if (!optimizeSCC(Def, &CmpInstr, &RI))
return false;
if (!MRI->use_nodbg_empty(DefReg)) {
>From 3b621ae6ea8a535caf85f16c88665a11068466bc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 13:25:54 -0500
Subject: [PATCH 4/5] Make foldableSelect a static function
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 777587243d2c7..07d016c9858fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10641,6 +10641,19 @@ static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
return true;
}
+static bool foldableSelect(MachineInstr *Def) {
+ if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
+ Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
+ bool Op1IsNonZeroImm =
+ Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
+ bool Op2IsZeroImm =
+ Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
+ if (Op1IsNonZeroImm && Op2IsZeroImm)
+ return true;
+ }
+ return false;
+}
+
bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Register SrcReg2, int64_t CmpMask,
int64_t CmpValue,
@@ -10660,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!Def || Def->getParent() != CmpInstr.getParent())
return false;
- const auto foldableSelect = [](MachineInstr *Def) -> bool {
- if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
- Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
- bool Op1IsNonZeroImm =
- Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
- bool Op2IsZeroImm =
- Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
- if (Op1IsNonZeroImm && Op2IsZeroImm)
- return true;
- }
- return false;
- };
-
// For S_OP that set SCC = DST!=0, do the transformation
//
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
>From 5a0b0b752f6f4776f796aebe8af7aa1886031abd Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 31 Oct 2025 16:11:53 -0500
Subject: [PATCH 5/5] Address feedback
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 34 +++++++++++++-------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 07d016c9858fd..d9f76c9a59d00 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10623,13 +10623,13 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
// update kill/dead flags if necessary.
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
- const SIRegisterInfo *RI) {
+ const SIRegisterInfo &RI) {
MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
SCCRedefine->getIterator())) {
- if (MI.modifiesRegister(AMDGPU::SCC, RI))
+ if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
- if (MI.killsRegister(AMDGPU::SCC, RI))
+ if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}
if (MachineOperand *SccDef =
@@ -10641,17 +10641,17 @@ static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
return true;
}
-static bool foldableSelect(MachineInstr *Def) {
- if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
- Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
- bool Op1IsNonZeroImm =
- Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
- bool Op2IsZeroImm =
- Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
- if (Op1IsNonZeroImm && Op2IsZeroImm)
- return true;
- }
- return false;
+static bool foldableSelect(const MachineInstr &Def) {
+ if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
+ Def.getOpcode() != AMDGPU::S_CSELECT_B64)
+ return false;
+ bool Op1IsNonZeroImm =
+ Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
+ bool Op2IsZeroImm =
+ Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
+ if (!Op1IsNonZeroImm || !Op2IsZeroImm)
+ return false;
+ return true;
}
bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
@@ -10683,10 +10683,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
//
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
// imm), 0)
- if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
+ if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def))
return false;
- if (!optimizeSCC(Def, &CmpInstr, &RI))
+ if (!optimizeSCC(Def, &CmpInstr, RI))
return false;
return true;
@@ -10766,7 +10766,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;
- if (!optimizeSCC(Def, &CmpInstr, &RI))
+ if (!optimizeSCC(Def, &CmpInstr, RI))
return false;
if (!MRI->use_nodbg_empty(DefReg)) {
More information about the llvm-commits
mailing list