[llvm] true16 for fold clamp (PR #128919)

Brox Chen via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 09:32:19 PST 2025


https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/128919

None

>From cec89d2e2553192c70d863d332fee768aaa96501 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 26 Feb 2025 12:31:48 -0500
Subject: [PATCH] true16 for fold clamp

---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index dbed042776e2c..38c5f20605930 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1684,6 +1684,10 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
     return false;
 
   MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
+  MachineInstr *OrigDef = Def;
+  // Look through COPY. COPY only observed with True16.
+  if (Def->isCopy() && Def->getOperand(1).getReg().isVirtual())
+    Def = MRI->getVRegDef(Def->getOperand(1).getReg());
 
   // The type of clamp must be compatible.
   if (TII->getClampMask(*Def) != TII->getClampMask(MI))
@@ -1701,7 +1705,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
   // Clamp is applied after omod, so it is OK if omod is set.
   DefClamp->setImm(1);
 
-  Register DefReg = Def->getOperand(0).getReg();
+  Register DefReg = OrigDef->getOperand(0).getReg();
   Register MIDstReg = MI.getOperand(0).getReg();
   if (TRI->isSGPRReg(*MRI, DefReg)) {
     // Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*



More information about the llvm-commits mailing list