[llvm] [AMDGPU] Fold dst = v_add 0, src -> src (PR #163298)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 14 10:20:35 PDT 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/163298
>From fde9ab2695c7f9f8c3fede8678914627d675a0bc Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 2 Oct 2025 16:57:48 -0700
Subject: [PATCH 1/8] NFC: Refactor tryFoldZeroHiBits
Change-Id: Ice882043dc3171eedd08049bb05ef5a046dbf94a
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 45 +++++++++++++----------
1 file changed, 26 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 51c56ecea2c96..93d0653b4ef54 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -245,7 +245,7 @@ class SIFoldOperandsImpl {
std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
bool tryConstantFoldOp(MachineInstr *MI) const;
bool tryFoldCndMask(MachineInstr &MI) const;
- bool tryFoldZeroHighBits(MachineInstr &MI) const;
+ bool tryFoldArithmetic(MachineInstr &MI) const;
bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const;
bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const;
@@ -1730,26 +1730,33 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
return true;
}
-bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
- if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
- MI.getOpcode() != AMDGPU::V_AND_B32_e32)
- return false;
+bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
+ unsigned Opc = MI.getOpcode();
- std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
- return false;
+ switch (Opc) {
+ default:
+ return false;
+ case AMDGPU::V_AND_B32_e64:
+ case AMDGPU::V_AND_B32_e32: {
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
+ return false;
- Register Src1 = MI.getOperand(2).getReg();
- MachineInstr *SrcDef = MRI->getVRegDef(Src1);
- if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
- return false;
+ Register Src1 = MI.getOperand(2).getReg();
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1);
+ if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
+ return false;
- Register Dst = MI.getOperand(0).getReg();
- MRI->replaceRegWith(Dst, Src1);
- if (!MI.getOperand(2).isKill())
- MRI->clearKillFlags(Src1);
- MI.eraseFromParent();
- return true;
+ Register Dst = MI.getOperand(0).getReg();
+ MRI->replaceRegWith(Dst, Src1);
+ if (!MI.getOperand(2).isKill())
+ MRI->clearKillFlags(Src1);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
}
bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
@@ -2790,7 +2797,7 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) {
for (auto &MI : make_early_inc_range(*MBB)) {
Changed |= tryFoldCndMask(MI);
- if (tryFoldZeroHighBits(MI)) {
+ if (tryFoldArithmetic(MI)) {
Changed = true;
continue;
}
>From 3c1c5a4ac995ed0ce017a66bab6a734283181f5c Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 2 Oct 2025 17:21:13 -0700
Subject: [PATCH 2/8] [AMDGPU] Fold dst = v_add 0, src -> src
Change-Id: I9b1162d93722f33eb5067502baf87590bf861e3c
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 32 ++++++++++++++-----
.../CodeGen/AMDGPU/groupstaticsize-zero.ll | 20 ++++++++++++
2 files changed, 44 insertions(+), 8 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 93d0653b4ef54..382360150d42f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1733,6 +1733,18 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
+ auto replaceAndFold = [this](MachineOperand &NewOp, MachineOperand &OldOp,
+ MachineInstr &MI) -> bool {
+ if (!(NewOp.isReg() && OldOp.isReg()))
+ return false;
+ Register OldReg = OldOp.getReg();
+ MRI->replaceRegWith(NewOp.getReg(), OldReg);
+ if (!OldOp.isKill())
+ MRI->clearKillFlags(OldReg);
+ MI.eraseFromParent();
+ return true;
+ };
+
switch (Opc) {
default:
return false;
@@ -1742,17 +1754,21 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
return false;
- Register Src1 = MI.getOperand(2).getReg();
- MachineInstr *SrcDef = MRI->getVRegDef(Src1);
+ MachineOperand &Src1Op = MI.getOperand(2);
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg());
if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
return false;
- Register Dst = MI.getOperand(0).getReg();
- MRI->replaceRegWith(Dst, Src1);
- if (!MI.getOperand(2).isKill())
- MRI->clearKillFlags(Src1);
- MI.eraseFromParent();
- return true;
+ return replaceAndFold(MI.getOperand(0), Src1Op, MI);
+ }
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_U32_e32: {
+ std::optional<int64_t> Src0Imm =
+ getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg())
+ return false;
+
+ return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
new file mode 100644
index 0000000000000..e52eb8aca9f84
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s
+
+ at global_smem = external addrspace(3) global [0 x i8]
+
+define amdgpu_kernel void @addzero() {
+; GCN-LABEL: addzero:
+; GCN: ; %bb.0: ; %.lr.ph
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: v_and_b32_e32 v0, 1, v0
+; GCN-NEXT: v_mov_b32_e32 v3, v2
+; GCN-NEXT: ds_write_b64 v0, v[2:3]
+; GCN-NEXT: s_endpgm
+.lr.ph:
+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = and i32 %0, 1
+ %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1
+ store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8
+ ret void
+}
>From 69e60fb368930fd6d440ad4ff7b893b1dd5b5d1b Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 13 Oct 2025 18:00:20 -0700
Subject: [PATCH 3/8] Formatting
Change-Id: If2b8c5e64be9291fb92f433542cd926be3193027
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 39 +++++++++++------------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 382360150d42f..115a429ea303a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1746,30 +1746,29 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
};
switch (Opc) {
- default:
+ default:
+ return false;
+ case AMDGPU::V_AND_B32_e64:
+ case AMDGPU::V_AND_B32_e32: {
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
return false;
- case AMDGPU::V_AND_B32_e64:
- case AMDGPU::V_AND_B32_e32: {
- std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
- return false;
- MachineOperand &Src1Op = MI.getOperand(2);
- MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg());
- if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
- return false;
+ MachineOperand &Src1Op = MI.getOperand(2);
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg());
+ if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
+ return false;
- return replaceAndFold(MI.getOperand(0), Src1Op, MI);
- }
- case AMDGPU::V_ADD_U32_e64:
- case AMDGPU::V_ADD_U32_e32: {
- std::optional<int64_t> Src0Imm =
- getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg())
- return false;
+ return replaceAndFold(MI.getOperand(0), Src1Op, MI);
+ }
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_U32_e32: {
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg())
+ return false;
- return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI);
- }
+ return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI);
+ }
}
return false;
>From 62866dfdd5a55b6749a70f236e51e8b91aa49326 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 14 Oct 2025 10:12:29 -0700
Subject: [PATCH 4/8] Revert "Formatting"
This reverts commit 69e60fb368930fd6d440ad4ff7b893b1dd5b5d1b.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 39 ++++++++++++-----------
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 115a429ea303a..382360150d42f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1746,29 +1746,30 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
};
switch (Opc) {
- default:
- return false;
- case AMDGPU::V_AND_B32_e64:
- case AMDGPU::V_AND_B32_e32: {
- std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
+ default:
return false;
+ case AMDGPU::V_AND_B32_e64:
+ case AMDGPU::V_AND_B32_e32: {
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
+ return false;
- MachineOperand &Src1Op = MI.getOperand(2);
- MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg());
- if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
- return false;
+ MachineOperand &Src1Op = MI.getOperand(2);
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg());
+ if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
+ return false;
- return replaceAndFold(MI.getOperand(0), Src1Op, MI);
- }
- case AMDGPU::V_ADD_U32_e64:
- case AMDGPU::V_ADD_U32_e32: {
- std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg())
- return false;
+ return replaceAndFold(MI.getOperand(0), Src1Op, MI);
+ }
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_U32_e32: {
+ std::optional<int64_t> Src0Imm =
+ getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg())
+ return false;
- return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI);
- }
+ return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI);
+ }
}
return false;
>From 24501fe0db668cb007436da6f79b79027a28a766 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 14 Oct 2025 10:12:37 -0700
Subject: [PATCH 5/8] Revert "[AMDGPU] Fold dst = v_add 0, src -> src"
This reverts commit 3c1c5a4ac995ed0ce017a66bab6a734283181f5c.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 32 +++++--------------
.../CodeGen/AMDGPU/groupstaticsize-zero.ll | 20 ------------
2 files changed, 8 insertions(+), 44 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 382360150d42f..93d0653b4ef54 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1733,18 +1733,6 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
- auto replaceAndFold = [this](MachineOperand &NewOp, MachineOperand &OldOp,
- MachineInstr &MI) -> bool {
- if (!(NewOp.isReg() && OldOp.isReg()))
- return false;
- Register OldReg = OldOp.getReg();
- MRI->replaceRegWith(NewOp.getReg(), OldReg);
- if (!OldOp.isKill())
- MRI->clearKillFlags(OldReg);
- MI.eraseFromParent();
- return true;
- };
-
switch (Opc) {
default:
return false;
@@ -1754,21 +1742,17 @@ bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
return false;
- MachineOperand &Src1Op = MI.getOperand(2);
- MachineInstr *SrcDef = MRI->getVRegDef(Src1Op.getReg());
+ Register Src1 = MI.getOperand(2).getReg();
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1);
if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
return false;
- return replaceAndFold(MI.getOperand(0), Src1Op, MI);
- }
- case AMDGPU::V_ADD_U32_e64:
- case AMDGPU::V_ADD_U32_e32: {
- std::optional<int64_t> Src0Imm =
- getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0 || !MI.getOperand(2).isReg())
- return false;
-
- return replaceAndFold(MI.getOperand(0), MI.getOperand(2), MI);
+ Register Dst = MI.getOperand(0).getReg();
+ MRI->replaceRegWith(Dst, Src1);
+ if (!MI.getOperand(2).isKill())
+ MRI->clearKillFlags(Src1);
+ MI.eraseFromParent();
+ return true;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
deleted file mode 100644
index e52eb8aca9f84..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s
-
- at global_smem = external addrspace(3) global [0 x i8]
-
-define amdgpu_kernel void @addzero() {
-; GCN-LABEL: addzero:
-; GCN: ; %bb.0: ; %.lr.ph
-; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: v_and_b32_e32 v0, 1, v0
-; GCN-NEXT: v_mov_b32_e32 v3, v2
-; GCN-NEXT: ds_write_b64 v0, v[2:3]
-; GCN-NEXT: s_endpgm
-.lr.ph:
- %0 = tail call i32 @llvm.amdgcn.workitem.id.x()
- %1 = and i32 %0, 1
- %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1
- store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8
- ret void
-}
>From 7434565e0a73c40cb6f0340d8274e52be0f553ff Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 14 Oct 2025 10:12:44 -0700
Subject: [PATCH 6/8] Revert "NFC: Refactor tryFoldZeroHiBits"
This reverts commit fde9ab2695c7f9f8c3fede8678914627d675a0bc.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 45 ++++++++++-------------
1 file changed, 19 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 93d0653b4ef54..51c56ecea2c96 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -245,7 +245,7 @@ class SIFoldOperandsImpl {
std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
bool tryConstantFoldOp(MachineInstr *MI) const;
bool tryFoldCndMask(MachineInstr &MI) const;
- bool tryFoldArithmetic(MachineInstr &MI) const;
+ bool tryFoldZeroHighBits(MachineInstr &MI) const;
bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const;
bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const;
@@ -1730,33 +1730,26 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
return true;
}
-bool SIFoldOperandsImpl::tryFoldArithmetic(MachineInstr &MI) const {
- unsigned Opc = MI.getOpcode();
-
- switch (Opc) {
- default:
- return false;
- case AMDGPU::V_AND_B32_e64:
- case AMDGPU::V_AND_B32_e32: {
- std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
- if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
- return false;
+bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
+ if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
+ MI.getOpcode() != AMDGPU::V_AND_B32_e32)
+ return false;
- Register Src1 = MI.getOperand(2).getReg();
- MachineInstr *SrcDef = MRI->getVRegDef(Src1);
- if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
- return false;
+ std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
+ if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg())
+ return false;
- Register Dst = MI.getOperand(0).getReg();
- MRI->replaceRegWith(Dst, Src1);
- if (!MI.getOperand(2).isKill())
- MRI->clearKillFlags(Src1);
- MI.eraseFromParent();
- return true;
- }
- }
+ Register Src1 = MI.getOperand(2).getReg();
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1);
+ if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
+ return false;
- return false;
+ Register Dst = MI.getOperand(0).getReg();
+ MRI->replaceRegWith(Dst, Src1);
+ if (!MI.getOperand(2).isKill())
+ MRI->clearKillFlags(Src1);
+ MI.eraseFromParent();
+ return true;
}
bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
@@ -2797,7 +2790,7 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) {
for (auto &MI : make_early_inc_range(*MBB)) {
Changed |= tryFoldCndMask(MI);
- if (tryFoldArithmetic(MI)) {
+ if (tryFoldZeroHighBits(MI)) {
Changed = true;
continue;
}
>From ec17ae2eee170a12d11272e6086e4336de0616b1 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 14 Oct 2025 10:18:55 -0700
Subject: [PATCH 7/8] Move to tryConstantFoldOp
Change-Id: I9b14559b4b5dc9c4bb383ebd517edcdc094a2e6c
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 8 ++++++++
.../CodeGen/AMDGPU/groupstaticsize-zero.ll | 20 +++++++++++++++++++
2 files changed, 28 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 51c56ecea2c96..eefdcf6d0d1ab 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1684,6 +1684,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
}
}
+ if (Opc == AMDGPU::V_ADD_U32_e64 || Opc == AMDGPU::V_ADD_U32_e32) {
+ if (Src1Val == 0) {
+ // y = add x, 0 -> y = copy x
+ MI->removeOperand(Src1Idx);
+ mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+ }
+ }
+
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
new file mode 100644
index 0000000000000..e52eb8aca9f84
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/groupstaticsize-zero.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN %s
+
+ at global_smem = external addrspace(3) global [0 x i8]
+
+define amdgpu_kernel void @addzero() {
+; GCN-LABEL: addzero:
+; GCN: ; %bb.0: ; %.lr.ph
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: v_and_b32_e32 v0, 1, v0
+; GCN-NEXT: v_mov_b32_e32 v3, v2
+; GCN-NEXT: ds_write_b64 v0, v[2:3]
+; GCN-NEXT: s_endpgm
+.lr.ph:
+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = and i32 %0, 1
+ %2 = getelementptr i8, ptr addrspace(3) @global_smem, i32 %1
+ store <4 x bfloat> zeroinitializer, ptr addrspace(3) %2, align 8
+ ret void
+}
>From 1b17398ed6bc6271536e7ad61acb239208238391 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 14 Oct 2025 10:20:14 -0700
Subject: [PATCH 8/8] Return true for changed
Change-Id: I8e7681f4920c9dee1bb4fb1b303c4c886c1969e3
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index eefdcf6d0d1ab..913f49503660f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1689,6 +1689,7 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
// y = add x, 0 -> y = copy x
MI->removeOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+ return true;
}
}
More information about the llvm-commits
mailing list