[llvm] [AMDGPU] Improved Lowering of abs(i16) and -abs(i16) (PR #165626)

Patrick Simmons via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 3 13:06:20 PST 2025


https://github.com/linuxrocks123 updated https://github.com/llvm/llvm-project/pull/165626

>From 3e7c870183487e9084d29cfdc48c8b83ce3566c6 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 27 Oct 2025 17:39:50 -0500
Subject: [PATCH 1/8] This doesn't work.

---
 llvm/lib/Target/AMDGPU/SOPInstructions.td | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 84287b621fe78..5a37be3663ed6 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1894,6 +1894,11 @@ def : GCNPat <
   (S_MOV_B32 imm:$imm)
 >;
 
+def : GCNPat <
+  (i32 (UniformUnaryFrag<anyext> (i16 (UniformBinFrag<smax> i16:$src, (i16 (UniformBinFrag<sub> 0, i16:$src)))))),
+  (S_ABS_I32 (i32 (S_SEXT_I32_I16 $src)))
+>;
+
 def : GCNPat <
   (v2i32 (UniformBinFrag<and> v2i32:$x, v2i32:$y)),
   (S_AND_B64 SReg_64:$x, SReg_64:$y)

>From b83ae56e86356d5a47cff3878c1acdff2f6920de Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 27 Oct 2025 21:26:37 -0500
Subject: [PATCH 2/8] Finally something that works

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 ++--
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  1 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |  1 +
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 ++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 ++
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |  5 -----
 6 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 64a7563182a98..fbc15d56a2826 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5557,8 +5557,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// \param N Node to expand
   /// \param IsNegative indicate negated abs
   /// \returns The expansion result or SDValue() if it fails.
-  SDValue expandABS(SDNode *N, SelectionDAG &DAG,
-                    bool IsNegative = false) const;
+  virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                            bool IsNegative = false) const;
 
   /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
   /// \param N Node to expand
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b8b419d93021a..b963b8f83070b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -23,6 +23,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 4fa0d3f72e1c7..6d8697834e536 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -282,6 +282,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   void SelectADD_SUB_I64(SDNode *N);
   void SelectAddcSubb(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
+  bool SelectABS(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
   void SelectMAD_64_32(SDNode *N);
   void SelectMUL_LOHI(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8ed4062e43946..c2b6a28cc9cc8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5286,6 +5286,22 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
 }
 
+SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
+                                        bool IsNegative) const {
+  assert(N->getOpcode() == ISD::ABS &&
+         "Tried to select abs with non-abs opcode.");
+
+  if (N->getValueSizeInBits(0) != 16 || IsNegative)
+    return TargetLowering::expandABS(N, CurDAG, IsNegative);
+
+  SDValue Src = N->getOperand(0);
+  SDLoc DL(Src);
+
+  SDValue SExtSrc = CurDAG.getSExtOrTrunc(Src, DL, MVT::i32);
+  SDValue ExtAbs = CurDAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
+  return CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+}
+
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bdaf48652d107..06327051667fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -135,6 +135,8 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  virtual SDValue expandABS(SDNode *N, SelectionDAG &CurDAG,
+                            bool IsNegative) const override;
 
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
 
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 5a37be3663ed6..84287b621fe78 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1894,11 +1894,6 @@ def : GCNPat <
   (S_MOV_B32 imm:$imm)
 >;
 
-def : GCNPat <
-  (i32 (UniformUnaryFrag<anyext> (i16 (UniformBinFrag<smax> i16:$src, (i16 (UniformBinFrag<sub> 0, i16:$src)))))),
-  (S_ABS_I32 (i32 (S_SEXT_I32_I16 $src)))
->;
-
 def : GCNPat <
   (v2i32 (UniformBinFrag<and> v2i32:$x, v2i32:$y)),
   (S_AND_B64 SReg_64:$x, SReg_64:$y)

>From fad716ce6bb9a0c19cc2117fce2e438c1dd6f19a Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 28 Oct 2025 17:25:42 -0500
Subject: [PATCH 3/8] This doesn't work.

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 +++++++--
 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll         | 22 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index c2b6a28cc9cc8..34fa65883970f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -18,6 +18,7 @@
 #include "AMDGPUMachineFunction.h"
 #include "AMDGPUMemoryUtils.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -5291,7 +5292,7 @@ SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
   assert(N->getOpcode() == ISD::ABS &&
          "Tried to select abs with non-abs opcode.");
 
-  if (N->getValueSizeInBits(0) != 16 || IsNegative)
+  if (N->getValueSizeInBits(0) != 16 || getRegClassFor(N->getSimpleValueType(0)) != &AMDGPU::SReg_32RegClass)
     return TargetLowering::expandABS(N, CurDAG, IsNegative);
 
   SDValue Src = N->getOperand(0);
@@ -5299,7 +5300,12 @@ SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
 
   SDValue SExtSrc = CurDAG.getSExtOrTrunc(Src, DL, MVT::i32);
   SDValue ExtAbs = CurDAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
-  return CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+  SDValue TruncResult = CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+  
+  if (!IsNegative)
+    return TruncResult;
+  return CurDAG.getNode(ISD::SUB, DL, MVT::i16,
+                        CurDAG.getConstant(0, DL, MVT::i16), TruncResult);
 }
 
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
diff --git a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
new file mode 100644
index 0000000000000..e61abb7173d78
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
+
+define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
+; CHECK-LABEL: abs_i16:
+; CHECK: %bb.0:
+; CHECK-NEXT: s_sext_i32_i16 s0, s0
+; CHECK-NEXT: s_abs_i32 s0, s0
+
+  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  ret i16 %res
+}
+
+define amdgpu_ps i16 @abs_i16_neg(i16 inreg %arg) {
+; CHECK-LABEL: abs_i16_neg:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_sext_i32_i16 s0, s0
+; CHECK-NEXT: s_abs_i32 s0, s0
+; CHECK-NEXT: s_sub_i32 s0, 0, s0
+  %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  %res2 = sub i16 0, %res1
+  ret i16 %res2
+}
\ No newline at end of file

>From 8def1a2829766a03a4213d3f0a1571b8b691042f Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 29 Oct 2025 12:55:07 -0500
Subject: [PATCH 4/8] Revert to master

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 ++--
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  1 -
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |  1 -
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 -------------------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 --
 5 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fbc15d56a2826..64a7563182a98 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5557,8 +5557,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// \param N Node to expand
   /// \param IsNegative indicate negated abs
   /// \returns The expansion result or SDValue() if it fails.
-  virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
-                            bool IsNegative = false) const;
+  SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                    bool IsNegative = false) const;
 
   /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
   /// \param N Node to expand
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b963b8f83070b..b8b419d93021a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -23,7 +23,6 @@
 #include "SIMachineFunctionInfo.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 6d8697834e536..4fa0d3f72e1c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -282,7 +282,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   void SelectADD_SUB_I64(SDNode *N);
   void SelectAddcSubb(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
-  bool SelectABS(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
   void SelectMAD_64_32(SDNode *N);
   void SelectMUL_LOHI(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 34fa65883970f..8ed4062e43946 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -18,7 +18,6 @@
 #include "AMDGPUMachineFunction.h"
 #include "AMDGPUMemoryUtils.h"
 #include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -5287,27 +5286,6 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
 }
 
-SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
-                                        bool IsNegative) const {
-  assert(N->getOpcode() == ISD::ABS &&
-         "Tried to select abs with non-abs opcode.");
-
-  if (N->getValueSizeInBits(0) != 16 || getRegClassFor(N->getSimpleValueType(0)) != &AMDGPU::SReg_32RegClass)
-    return TargetLowering::expandABS(N, CurDAG, IsNegative);
-
-  SDValue Src = N->getOperand(0);
-  SDLoc DL(Src);
-
-  SDValue SExtSrc = CurDAG.getSExtOrTrunc(Src, DL, MVT::i32);
-  SDValue ExtAbs = CurDAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
-  SDValue TruncResult = CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
-  
-  if (!IsNegative)
-    return TruncResult;
-  return CurDAG.getNode(ISD::SUB, DL, MVT::i16,
-                        CurDAG.getConstant(0, DL, MVT::i16), TruncResult);
-}
-
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 06327051667fe..bdaf48652d107 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -135,8 +135,6 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-  virtual SDValue expandABS(SDNode *N, SelectionDAG &CurDAG,
-                            bool IsNegative) const override;
 
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
 

>From 6507e0a15a4d29a8644d18e35b3eae6d4e12a47e Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 29 Oct 2025 16:18:19 -0500
Subject: [PATCH 5/8] Machine-Level Implementation

---
 llvm/lib/Target/AMDGPU/AMDGPU.h                | 11 +++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +++
 llvm/lib/Target/AMDGPU/CMakeLists.txt          |  1 +
 3 files changed, 15 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index ce2b4a5f6f2e9..43a052b687109 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -39,6 +39,7 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsLegacyPass();
 FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
+FunctionPass *createSISAbs16FixupLegacyPass();
 FunctionPass *createSIShrinkInstructionsLegacyPass();
 FunctionPass *createSILoadStoreOptimizerLegacyPass();
 FunctionPass *createSIWholeQuadModeLegacyPass();
@@ -93,6 +94,13 @@ class SILowerI1CopiesPass : public PassInfoMixin<SILowerI1CopiesPass> {
                         MachineFunctionAnalysisManager &MFAM);
 };
 
+class SISAbs16FixupPass : public PassInfoMixin<SISAbs16FixupPass> {
+public:
+  SISAbs16FixupPass() = default;
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
 
 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
@@ -197,6 +205,9 @@ extern char &SILowerWWMCopiesLegacyID;
 void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
 extern char &SILowerI1CopiesLegacyID;
 
+void initializeSISAbs16FixupLegacyPass(PassRegistry &);
+extern char &SISAbs16FixupLegacyID;
+
 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
 extern char &AMDGPUGlobalISelDivergenceLoweringID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 996b55f42fd0b..90405fed8efdd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -551,6 +551,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR);
   initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
+  initializeSISAbs16FixupLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeAMDGPURegBankSelectPass(*PR);
   initializeAMDGPURegBankLegalizePass(*PR);
@@ -1517,6 +1518,7 @@ bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(&SIFixSGPRCopiesLegacyID);
   addPass(createSILowerI1CopiesLegacyPass());
+  addPass(createSISAbs16FixupLegacyPass());
   return false;
 }
 
@@ -2209,6 +2211,7 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
   addPass(AMDGPUISelDAGToDAGPass(TM));
   addPass(SIFixSGPRCopiesPass());
   addPass(SILowerI1CopiesPass());
+  addPass(SISAbs16FixupPass());
   return Error::success();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index a1e0e5293c706..cd9225acdb002 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen
   SIPreEmitPeephole.cpp
   SIProgramInfo.cpp
   SIRegisterInfo.cpp
+  SISAbs16Fixup.cpp
   SIShrinkInstructions.cpp
   SIWholeQuadMode.cpp
 

>From 732015f864222e202bb87a0f3092f3ac2f45a6f3 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 30 Oct 2025 02:44:22 -0500
Subject: [PATCH 6/8] Add new file

---
 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp | 168 +++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp

diff --git a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
new file mode 100644
index 0000000000000..fd305b6ffc061
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
@@ -0,0 +1,168 @@
+//===-- SISAbs16Fixup.cpp - Lower I1 Copies -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass matches the pattern for 16-bit ABS instructions after they have
+// been lowered to for execution on the Scalar Unit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "si-abs16-pattern"
+
+using namespace llvm;
+
+static Register pierceCopies(Register R, MachineRegisterInfo& MRI) {
+  MachineInstr *CopyMI = MRI.getVRegDef(R);
+  while (CopyMI && CopyMI->getOpcode() == AMDGPU::COPY) {
+    Register T = CopyMI->getOperand(1).getReg();
+    if (!T.isVirtual())
+      break;
+
+    R = T;
+    CopyMI = MRI.getVRegDef(R);
+  }
+
+  return R;
+}
+
+static MachineInstr *matchExpandAbsPattern(MachineInstr &MI,
+                                           MachineRegisterInfo &MRI) {
+  std::array<MachineInstr *, 2> SextInstructions;
+  for (unsigned I = 0; I < SextInstructions.size(); I++)
+  {
+    SextInstructions[I] = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
+    if (SextInstructions[I]->getOpcode() != AMDGPU::S_SEXT_I32_I16)
+      return nullptr;
+  }
+
+  Register AbsSource;
+  MachineInstr* SubIns = nullptr;
+  for (MachineInstr *SextMI : SextInstructions) {
+    Register SextReg = SextMI->getOperand(1).getReg();
+    MachineInstr* OperandMI = MRI.getVRegDef(SextReg);
+    if (OperandMI->getOpcode() == AMDGPU::S_SUB_I32)
+      if(!SubIns)
+        SubIns = OperandMI;
+      else
+        return nullptr;
+    else
+      AbsSource = pierceCopies(SextReg,MRI);
+  }
+
+  if (!SubIns)
+    return nullptr;
+
+  if (MRI.getRegClass(AbsSource) != &AMDGPU::SGPR_32RegClass)
+    return nullptr;
+
+  MachineInstr &MustBeZero =
+      *MRI.getVRegDef(pierceCopies(SubIns->getOperand(1).getReg(), MRI));
+  if (MustBeZero.getOpcode() != AMDGPU::S_MOV_B32 ||
+      MustBeZero.getOperand(1).getImm())
+    return nullptr;
+
+  if (pierceCopies(SubIns->getOperand(2).getReg(), MRI) != AbsSource)
+    return nullptr;
+
+  return MRI.getVRegDef(AbsSource);
+}
+
+static bool runSAbs16Fixup(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+  bool Changed = false;
+  
+  for (MachineBasicBlock &MBB : MF)
+    for (MachineInstr &MI : make_early_inc_range(MBB)) {
+      bool IsPositive = MI.getOpcode() == AMDGPU::S_MAX_I32;
+      bool IsNegative = MI.getOpcode() == AMDGPU::S_MIN_I32;
+      MachineInstr* AbsSourceMI;
+      if ((!IsPositive && !IsNegative) ||
+          !(AbsSourceMI = matchExpandAbsPattern(MI, MRI)))
+        continue;
+
+      Register SextDestReg =
+          MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      Register AbsDestReg =
+          IsNegative ? MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass)
+                     : MI.getOperand(0).getReg();
+
+      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SEXT_I32_I16),
+              SextDestReg)
+          .addReg(AbsSourceMI->getOperand(0).getReg());
+      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_ABS_I32), AbsDestReg)
+          .addReg(SextDestReg);
+
+      if(IsNegative)
+        BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SUB_I32),
+                MI.getOperand(0).getReg())
+            .addImm(0)
+            .addReg(AbsDestReg);
+
+      MI.eraseFromParent();
+      Changed = true;
+    }
+
+  return Changed;
+}
+
+PreservedAnalyses SISAbs16FixupPass::run(MachineFunction &MF,
+                                         MachineFunctionAnalysisManager &MFAM) {
+  bool Changed = runSAbs16Fixup(MF);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  // TODO: Probably preserves most.
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
+
+class SISAbs16FixupLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  SISAbs16FixupLegacy() : MachineFunctionPass(ID) {
+    initializeSISAbs16FixupLegacyPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "SI SAbs16 Fixup"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+bool SISAbs16FixupLegacy::runOnMachineFunction(MachineFunction &MF) {
+  return runSAbs16Fixup(MF);
+}
+
+INITIALIZE_PASS_BEGIN(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
+                      false, false)
+INITIALIZE_PASS_END(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
+                    false, false)
+
+char SISAbs16FixupLegacy::ID = 0;
+
+char &llvm::SISAbs16FixupLegacyID = SISAbs16FixupLegacy::ID;
+
+FunctionPass *llvm::createSISAbs16FixupLegacyPass() {
+  return new SISAbs16FixupLegacy();
+}

>From c9b86830583e1fed9f15bce30504bbc576850486 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Fri, 31 Oct 2025 14:29:42 -0500
Subject: [PATCH 7/8] Run update_llc_test_checks.py

---
 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
index e61abb7173d78..0cdbedd837396 100644
--- a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
 
 define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
 ; CHECK-LABEL: abs_i16:
-; CHECK: %bb.0:
-; CHECK-NEXT: s_sext_i32_i16 s0, s0
-; CHECK-NEXT: s_abs_i32 s0, s0
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_sext_i32_i16 s0, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
+; CHECK-NEXT:    ; return to shader part epilog
 
   %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   ret i16 %res
@@ -12,11 +15,12 @@ define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
 
 define amdgpu_ps i16 @abs_i16_neg(i16 inreg %arg) {
 ; CHECK-LABEL: abs_i16_neg:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_sext_i32_i16 s0, s0
-; CHECK-NEXT: s_abs_i32 s0, s0
-; CHECK-NEXT: s_sub_i32 s0, 0, s0
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_sext_i32_i16 s0, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
+; CHECK-NEXT:    s_sub_i32 s0, 0, s0
+; CHECK-NEXT:    ; return to shader part epilog
   %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   %res2 = sub i16 0, %res1
   ret i16 %res2
-}
\ No newline at end of file
+}

>From 14d4d7c69029f586bc7f9b590dc343b862c2421b Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 3 Nov 2025 16:05:57 -0500
Subject: [PATCH 8/8] Attempt #4, with DAG again

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  11 --
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 -
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  26 +++
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |   1 +
 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp      | 168 ------------------
 6 files changed, 27 insertions(+), 183 deletions(-)
 delete mode 100644 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 43a052b687109..ce2b4a5f6f2e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -39,7 +39,6 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsLegacyPass();
 FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
-FunctionPass *createSISAbs16FixupLegacyPass();
 FunctionPass *createSIShrinkInstructionsLegacyPass();
 FunctionPass *createSILoadStoreOptimizerLegacyPass();
 FunctionPass *createSIWholeQuadModeLegacyPass();
@@ -94,13 +93,6 @@ class SILowerI1CopiesPass : public PassInfoMixin<SILowerI1CopiesPass> {
                         MachineFunctionAnalysisManager &MFAM);
 };
 
-class SISAbs16FixupPass : public PassInfoMixin<SISAbs16FixupPass> {
-public:
-  SISAbs16FixupPass() = default;
-  PreservedAnalyses run(MachineFunction &MF,
-                        MachineFunctionAnalysisManager &MFAM);
-};
-
 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
 
 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
@@ -205,9 +197,6 @@ extern char &SILowerWWMCopiesLegacyID;
 void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
 extern char &SILowerI1CopiesLegacyID;
 
-void initializeSISAbs16FixupLegacyPass(PassRegistry &);
-extern char &SISAbs16FixupLegacyID;
-
 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
 extern char &AMDGPUGlobalISelDivergenceLoweringID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 90405fed8efdd..996b55f42fd0b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -551,7 +551,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR);
   initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
-  initializeSISAbs16FixupLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeAMDGPURegBankSelectPass(*PR);
   initializeAMDGPURegBankLegalizePass(*PR);
@@ -1518,7 +1517,6 @@ bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(&SIFixSGPRCopiesLegacyID);
   addPass(createSILowerI1CopiesLegacyPass());
-  addPass(createSISAbs16FixupLegacyPass());
   return false;
 }
 
@@ -2211,7 +2209,6 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
   addPass(AMDGPUISelDAGToDAGPass(TM));
   addPass(SIFixSGPRCopiesPass());
   addPass(SILowerI1CopiesPass());
-  addPass(SISAbs16FixupPass());
   return Error::success();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index cd9225acdb002..a1e0e5293c706 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,7 +185,6 @@ add_llvm_target(AMDGPUCodeGen
   SIPreEmitPeephole.cpp
   SIProgramInfo.cpp
   SIRegisterInfo.cpp
-  SISAbs16Fixup.cpp
   SIShrinkInstructions.cpp
   SIWholeQuadMode.cpp
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a7574213c2907..2406b7c627f59 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -177,6 +177,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
     addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
     addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
+
+    // We don't want the default expansion of 16-bit ABS since we can
+    // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
+    setOperationAction(ISD::ABS, MVT::i16, Custom);
   }
 
   addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -6774,6 +6778,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DEBUGTRAP:
     return lowerDEBUGTRAP(Op, DAG);
   case ISD::ABS:
+    if (Op.getValueType() == MVT::i16)
+      return lowerABSi16(Op, DAG);
+    // fall through
   case ISD::FABS:
   case ISD::FNEG:
   case ISD::FCANONICALIZE:
@@ -8133,6 +8140,25 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
 }
 
+// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
+SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::ABS &&
+         "Tried to select abs with non-abs opcode.");
+  assert(Op.getValueType() == MVT::i16 &&
+         "Tried to select abs i16 lowering with non-i16 type.");
+
+  // divergent means will not end up using SGPRs
+  if (Op->isDivergent())
+    return SDValue();
+
+  //(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
+  SDValue Src = Op.getOperand(0);
+  SDLoc DL(Src);
+  SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+  SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+}
+
 SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
                                              SelectionDAG &DAG) const {
   if (Subtarget->hasApertureRegs()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 74e58f4272e10..25e94851c24df 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -184,6 +184,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerABSi16(SDValue Op, SelectionDAG &DAG) const;
 
   SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
deleted file mode 100644
index fd305b6ffc061..0000000000000
--- a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-//===-- SISAbs16Fixup.cpp - Lower I1 Copies -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass matches the pattern for 16-bit ABS instructions after they have
-// been lowered to for execution on the Scalar Unit.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineSSAUpdater.h"
-#include "llvm/InitializePasses.h"
-
-#define DEBUG_TYPE "si-abs16-pattern"
-
-using namespace llvm;
-
-static Register pierceCopies(Register R, MachineRegisterInfo& MRI) {
-  MachineInstr *CopyMI = MRI.getVRegDef(R);
-  while (CopyMI && CopyMI->getOpcode() == AMDGPU::COPY) {
-    Register T = CopyMI->getOperand(1).getReg();
-    if (!T.isVirtual())
-      break;
-
-    R = T;
-    CopyMI = MRI.getVRegDef(R);
-  }
-
-  return R;
-}
-
-static MachineInstr *matchExpandAbsPattern(MachineInstr &MI,
-                                           MachineRegisterInfo &MRI) {
-  std::array<MachineInstr *, 2> SextInstructions;
-  for (unsigned I = 0; I < SextInstructions.size(); I++)
-  {
-    SextInstructions[I] = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
-    if (SextInstructions[I]->getOpcode() != AMDGPU::S_SEXT_I32_I16)
-      return nullptr;
-  }
-
-  Register AbsSource;
-  MachineInstr* SubIns = nullptr;
-  for (MachineInstr *SextMI : SextInstructions) {
-    Register SextReg = SextMI->getOperand(1).getReg();
-    MachineInstr* OperandMI = MRI.getVRegDef(SextReg);
-    if (OperandMI->getOpcode() == AMDGPU::S_SUB_I32)
-      if(!SubIns)
-        SubIns = OperandMI;
-      else
-        return nullptr;
-    else
-      AbsSource = pierceCopies(SextReg,MRI);
-  }
-
-  if (!SubIns)
-    return nullptr;
-
-  if (MRI.getRegClass(AbsSource) != &AMDGPU::SGPR_32RegClass)
-    return nullptr;
-
-  MachineInstr &MustBeZero =
-      *MRI.getVRegDef(pierceCopies(SubIns->getOperand(1).getReg(), MRI));
-  if (MustBeZero.getOpcode() != AMDGPU::S_MOV_B32 ||
-      MustBeZero.getOperand(1).getImm())
-    return nullptr;
-
-  if (pierceCopies(SubIns->getOperand(2).getReg(), MRI) != AbsSource)
-    return nullptr;
-
-  return MRI.getVRegDef(AbsSource);
-}
-
-static bool runSAbs16Fixup(MachineFunction &MF) {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
-
-  bool Changed = false;
-  
-  for (MachineBasicBlock &MBB : MF)
-    for (MachineInstr &MI : make_early_inc_range(MBB)) {
-      bool IsPositive = MI.getOpcode() == AMDGPU::S_MAX_I32;
-      bool IsNegative = MI.getOpcode() == AMDGPU::S_MIN_I32;
-      MachineInstr* AbsSourceMI;
-      if ((!IsPositive && !IsNegative) ||
-          !(AbsSourceMI = matchExpandAbsPattern(MI, MRI)))
-        continue;
-
-      Register SextDestReg =
-          MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-      Register AbsDestReg =
-          IsNegative ? MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass)
-                     : MI.getOperand(0).getReg();
-
-      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SEXT_I32_I16),
-              SextDestReg)
-          .addReg(AbsSourceMI->getOperand(0).getReg());
-      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_ABS_I32), AbsDestReg)
-          .addReg(SextDestReg);
-
-      if(IsNegative)
-        BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SUB_I32),
-                MI.getOperand(0).getReg())
-            .addImm(0)
-            .addReg(AbsDestReg);
-
-      MI.eraseFromParent();
-      Changed = true;
-    }
-
-  return Changed;
-}
-
-PreservedAnalyses SISAbs16FixupPass::run(MachineFunction &MF,
-                                         MachineFunctionAnalysisManager &MFAM) {
-  bool Changed = runSAbs16Fixup(MF);
-  if (!Changed)
-    return PreservedAnalyses::all();
-
-  // TODO: Probably preserves most.
-  PreservedAnalyses PA;
-  PA.preserveSet<CFGAnalyses>();
-  return PA;
-}
-
-class SISAbs16FixupLegacy : public MachineFunctionPass {
-public:
-  static char ID;
-
-  SISAbs16FixupLegacy() : MachineFunctionPass(ID) {
-    initializeSISAbs16FixupLegacyPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  StringRef getPassName() const override { return "SI SAbs16 Fixup"; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-
-bool SISAbs16FixupLegacy::runOnMachineFunction(MachineFunction &MF) {
-  return runSAbs16Fixup(MF);
-}
-
-INITIALIZE_PASS_BEGIN(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
-                      false, false)
-INITIALIZE_PASS_END(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
-                    false, false)
-
-char SISAbs16FixupLegacy::ID = 0;
-
-char &llvm::SISAbs16FixupLegacyID = SISAbs16FixupLegacy::ID;
-
-FunctionPass *llvm::createSISAbs16FixupLegacyPass() {
-  return new SISAbs16FixupLegacy();
-}



More information about the llvm-commits mailing list