[llvm] true16 for fold clamp (PR #128919)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 09:32:19 PST 2025
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/128919
None
>From cec89d2e2553192c70d863d332fee768aaa96501 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 26 Feb 2025 12:31:48 -0500
Subject: [PATCH] true16 for fold clamp
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index dbed042776e2c..38c5f20605930 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1684,6 +1684,10 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
return false;
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
+ MachineInstr *OrigDef = Def;
+ // Look through COPY. COPY only observed with True16.
+ if (Def->isCopy() && Def->getOperand(1).getReg().isVirtual())
+ Def = MRI->getVRegDef(Def->getOperand(1).getReg());
// The type of clamp must be compatible.
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
@@ -1701,7 +1705,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
// Clamp is applied after omod, so it is OK if omod is set.
DefClamp->setImm(1);
- Register DefReg = Def->getOperand(0).getReg();
+ Register DefReg = OrigDef->getOperand(0).getReg();
Register MIDstReg = MI.getOperand(0).getReg();
if (TRI->isSGPRReg(*MRI, DefReg)) {
// Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*
More information about the llvm-commits
mailing list