[llvm] [AMDGPU] Improved Lowering of abs(i8/i16) and -abs(i8/i16) (PR #165626)

Patrick Simmons via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 14 13:34:34 PST 2025


https://github.com/linuxrocks123 updated https://github.com/llvm/llvm-project/pull/165626

>From eb300b52173443d1594a7d275b406e4832345233 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 27 Oct 2025 17:39:50 -0500
Subject: [PATCH 01/21] This doesn't work.

---
 llvm/lib/Target/AMDGPU/SOPInstructions.td | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1931e0be15152..731dfece8f3fe 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1895,6 +1895,11 @@ def : GCNPat <
   (S_MOV_B32 imm:$imm)
 >;
 
+def : GCNPat <
+  (i32 (UniformUnaryFrag<anyext> (i16 (UniformBinFrag<smax> i16:$src, (i16 (UniformBinFrag<sub> 0, i16:$src)))))),
+  (S_ABS_I32 (i32 (S_SEXT_I32_I16 $src)))
+>;
+
 def : GCNPat <
   (v2i32 (UniformBinFrag<and> v2i32:$x, v2i32:$y)),
   (S_AND_B64 SReg_64:$x, SReg_64:$y)

>From cb08bca47d6bf0a2dddce8cb9589047366a782f7 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 27 Oct 2025 21:26:37 -0500
Subject: [PATCH 02/21] Finally something that works

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 ++--
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  1 +
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |  1 +
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 ++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 ++
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |  5 -----
 6 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4d5d1fc7dfadc..a3c19c6777790 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5575,8 +5575,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// \param N Node to expand
   /// \param IsNegative indicate negated abs
   /// \returns The expansion result or SDValue() if it fails.
-  SDValue expandABS(SDNode *N, SelectionDAG &DAG,
-                    bool IsNegative = false) const;
+  virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                            bool IsNegative = false) const;
 
   /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
   /// \param N Node to expand
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b8b419d93021a..b963b8f83070b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -23,6 +23,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 4fa0d3f72e1c7..6d8697834e536 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -282,6 +282,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   void SelectADD_SUB_I64(SDNode *N);
   void SelectAddcSubb(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
+  bool SelectABS(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
   void SelectMAD_64_32(SDNode *N);
   void SelectMUL_LOHI(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f5081a9d2dd56..ef8b7e0f7d323 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5287,6 +5287,22 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
 }
 
+SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
+                                        bool IsNegative) const {
+  assert(N->getOpcode() == ISD::ABS &&
+         "Tried to select abs with non-abs opcode.");
+
+  if (N->getValueSizeInBits(0) != 16 || IsNegative)
+    return TargetLowering::expandABS(N, CurDAG, IsNegative);
+
+  SDValue Src = N->getOperand(0);
+  SDLoc DL(Src);
+
+  SDValue SExtSrc = CurDAG.getSExtOrTrunc(Src, DL, MVT::i32);
+  SDValue ExtAbs = CurDAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
+  return CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+}
+
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bdaf48652d107..06327051667fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -135,6 +135,8 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  virtual SDValue expandABS(SDNode *N, SelectionDAG &CurDAG,
+                            bool IsNegative) const override;
 
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
 
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 731dfece8f3fe..1931e0be15152 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1895,11 +1895,6 @@ def : GCNPat <
   (S_MOV_B32 imm:$imm)
 >;
 
-def : GCNPat <
-  (i32 (UniformUnaryFrag<anyext> (i16 (UniformBinFrag<smax> i16:$src, (i16 (UniformBinFrag<sub> 0, i16:$src)))))),
-  (S_ABS_I32 (i32 (S_SEXT_I32_I16 $src)))
->;
-
 def : GCNPat <
   (v2i32 (UniformBinFrag<and> v2i32:$x, v2i32:$y)),
   (S_AND_B64 SReg_64:$x, SReg_64:$y)

>From 25652bdfca2d94e7f2f4d15642c2997b0b6c0db3 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 28 Oct 2025 17:25:42 -0500
Subject: [PATCH 03/21] This doesn't work.

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 +++++++--
 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll         | 22 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef8b7e0f7d323..a31dbf1d4a894 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -18,6 +18,7 @@
 #include "AMDGPUMachineFunction.h"
 #include "AMDGPUMemoryUtils.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -5292,7 +5293,7 @@ SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
   assert(N->getOpcode() == ISD::ABS &&
          "Tried to select abs with non-abs opcode.");
 
-  if (N->getValueSizeInBits(0) != 16 || IsNegative)
+  if (N->getValueSizeInBits(0) != 16 || getRegClassFor(N->getSimpleValueType(0)) != &AMDGPU::SReg_32RegClass)
     return TargetLowering::expandABS(N, CurDAG, IsNegative);
 
   SDValue Src = N->getOperand(0);
@@ -5300,7 +5301,12 @@ SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
 
   SDValue SExtSrc = CurDAG.getSExtOrTrunc(Src, DL, MVT::i32);
   SDValue ExtAbs = CurDAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
-  return CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+  SDValue TruncResult = CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+  
+  if (!IsNegative)
+    return TruncResult;
+  return CurDAG.getNode(ISD::SUB, DL, MVT::i16,
+                        CurDAG.getConstant(0, DL, MVT::i16), TruncResult);
 }
 
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
diff --git a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
new file mode 100644
index 0000000000000..e61abb7173d78
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
+
+define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
+; CHECK-LABEL: abs_i16:
+; CHECK: %bb.0:
+; CHECK-NEXT: s_sext_i32_i16 s0, s0
+; CHECK-NEXT: s_abs_i32 s0, s0
+
+  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  ret i16 %res
+}
+
+define amdgpu_ps i16 @abs_i16_neg(i16 inreg %arg) {
+; CHECK-LABEL: abs_i16_neg:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_sext_i32_i16 s0, s0
+; CHECK-NEXT: s_abs_i32 s0, s0
+; CHECK-NEXT: s_sub_i32 s0, 0, s0
+  %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  %res2 = sub i16 0, %res1
+  ret i16 %res2
+}
\ No newline at end of file

>From 0d151c6379b343cda071f9e1d8031bb8e7664590 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 29 Oct 2025 12:55:07 -0500
Subject: [PATCH 04/21] Revert to master

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 ++--
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  1 -
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h   |  1 -
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 -------------------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 --
 5 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a3c19c6777790..4d5d1fc7dfadc 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5575,8 +5575,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// \param N Node to expand
   /// \param IsNegative indicate negated abs
   /// \returns The expansion result or SDValue() if it fails.
-  virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
-                            bool IsNegative = false) const;
+  SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                    bool IsNegative = false) const;
 
   /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
   /// \param N Node to expand
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b963b8f83070b..b8b419d93021a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -23,7 +23,6 @@
 #include "SIMachineFunctionInfo.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 6d8697834e536..4fa0d3f72e1c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -282,7 +282,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   void SelectADD_SUB_I64(SDNode *N);
   void SelectAddcSubb(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
-  bool SelectABS(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
   void SelectMAD_64_32(SDNode *N);
   void SelectMUL_LOHI(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a31dbf1d4a894..f5081a9d2dd56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -18,7 +18,6 @@
 #include "AMDGPUMachineFunction.h"
 #include "AMDGPUMemoryUtils.h"
 #include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -5288,27 +5287,6 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
 }
 
-SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &CurDAG,
-                                        bool IsNegative) const {
-  assert(N->getOpcode() == ISD::ABS &&
-         "Tried to select abs with non-abs opcode.");
-
-  if (N->getValueSizeInBits(0) != 16 || getRegClassFor(N->getSimpleValueType(0)) != &AMDGPU::SReg_32RegClass)
-    return TargetLowering::expandABS(N, CurDAG, IsNegative);
-
-  SDValue Src = N->getOperand(0);
-  SDLoc DL(Src);
-
-  SDValue SExtSrc = CurDAG.getSExtOrTrunc(Src, DL, MVT::i32);
-  SDValue ExtAbs = CurDAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
-  SDValue TruncResult = CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
-  
-  if (!IsNegative)
-    return TruncResult;
-  return CurDAG.getNode(ISD::SUB, DL, MVT::i16,
-                        CurDAG.getConstant(0, DL, MVT::i16), TruncResult);
-}
-
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 06327051667fe..bdaf48652d107 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -135,8 +135,6 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-  virtual SDValue expandABS(SDNode *N, SelectionDAG &CurDAG,
-                            bool IsNegative) const override;
 
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
 

>From 67e5633dd4d8b782b22cd71097b7477f7de70d9c Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 29 Oct 2025 16:18:19 -0500
Subject: [PATCH 05/21] Machine-Level Implementation

---
 llvm/lib/Target/AMDGPU/AMDGPU.h                | 11 +++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +++
 llvm/lib/Target/AMDGPU/CMakeLists.txt          |  1 +
 3 files changed, 15 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 67042b700c047..f7e46430d658f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -39,6 +39,7 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsLegacyPass();
 FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
+FunctionPass *createSISAbs16FixupLegacyPass();
 FunctionPass *createSIShrinkInstructionsLegacyPass();
 FunctionPass *createSILoadStoreOptimizerLegacyPass();
 FunctionPass *createSIWholeQuadModeLegacyPass();
@@ -93,6 +94,13 @@ class SILowerI1CopiesPass : public PassInfoMixin<SILowerI1CopiesPass> {
                         MachineFunctionAnalysisManager &MFAM);
 };
 
+class SISAbs16FixupPass : public PassInfoMixin<SISAbs16FixupPass> {
+public:
+  SISAbs16FixupPass() = default;
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
 
 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
@@ -197,6 +205,9 @@ extern char &SILowerWWMCopiesLegacyID;
 void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
 extern char &SILowerI1CopiesLegacyID;
 
+void initializeSISAbs16FixupLegacyPass(PassRegistry &);
+extern char &SISAbs16FixupLegacyID;
+
 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
 extern char &AMDGPUGlobalISelDivergenceLoweringID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b87b54ffc4f12..3065658f4d8f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -551,6 +551,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR);
   initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
+  initializeSISAbs16FixupLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeAMDGPURegBankSelectPass(*PR);
   initializeAMDGPURegBankLegalizePass(*PR);
@@ -1521,6 +1522,7 @@ bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(&SIFixSGPRCopiesLegacyID);
   addPass(createSILowerI1CopiesLegacyPass());
+  addPass(createSISAbs16FixupLegacyPass());
   return false;
 }
 
@@ -2215,6 +2217,7 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
   addPass(AMDGPUISelDAGToDAGPass(TM));
   addPass(SIFixSGPRCopiesPass());
   addPass(SILowerI1CopiesPass());
+  addPass(SISAbs16FixupPass());
   return Error::success();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index a1e0e5293c706..cd9225acdb002 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen
   SIPreEmitPeephole.cpp
   SIProgramInfo.cpp
   SIRegisterInfo.cpp
+  SISAbs16Fixup.cpp
   SIShrinkInstructions.cpp
   SIWholeQuadMode.cpp
 

>From fd6e54d11a88bdc18c6586fde874c45bdf6a7850 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 30 Oct 2025 02:44:22 -0500
Subject: [PATCH 06/21] Add new file

---
 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp | 168 +++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp

diff --git a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
new file mode 100644
index 0000000000000..fd305b6ffc061
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
@@ -0,0 +1,168 @@
+//===-- SISAbs16Fixup.cpp - Lower I1 Copies -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass matches the pattern for 16-bit ABS instructions after they have
+// been lowered to for execution on the Scalar Unit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "si-abs16-pattern"
+
+using namespace llvm;
+
+static Register pierceCopies(Register R, MachineRegisterInfo& MRI) {
+  MachineInstr *CopyMI = MRI.getVRegDef(R);
+  while (CopyMI && CopyMI->getOpcode() == AMDGPU::COPY) {
+    Register T = CopyMI->getOperand(1).getReg();
+    if (!T.isVirtual())
+      break;
+
+    R = T;
+    CopyMI = MRI.getVRegDef(R);
+  }
+
+  return R;
+}
+
+static MachineInstr *matchExpandAbsPattern(MachineInstr &MI,
+                                           MachineRegisterInfo &MRI) {
+  std::array<MachineInstr *, 2> SextInstructions;
+  for (unsigned I = 0; I < SextInstructions.size(); I++)
+  {
+    SextInstructions[I] = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
+    if (SextInstructions[I]->getOpcode() != AMDGPU::S_SEXT_I32_I16)
+      return nullptr;
+  }
+
+  Register AbsSource;
+  MachineInstr* SubIns = nullptr;
+  for (MachineInstr *SextMI : SextInstructions) {
+    Register SextReg = SextMI->getOperand(1).getReg();
+    MachineInstr* OperandMI = MRI.getVRegDef(SextReg);
+    if (OperandMI->getOpcode() == AMDGPU::S_SUB_I32)
+      if(!SubIns)
+        SubIns = OperandMI;
+      else
+        return nullptr;
+    else
+      AbsSource = pierceCopies(SextReg,MRI);
+  }
+
+  if (!SubIns)
+    return nullptr;
+
+  if (MRI.getRegClass(AbsSource) != &AMDGPU::SGPR_32RegClass)
+    return nullptr;
+
+  MachineInstr &MustBeZero =
+      *MRI.getVRegDef(pierceCopies(SubIns->getOperand(1).getReg(), MRI));
+  if (MustBeZero.getOpcode() != AMDGPU::S_MOV_B32 ||
+      MustBeZero.getOperand(1).getImm())
+    return nullptr;
+
+  if (pierceCopies(SubIns->getOperand(2).getReg(), MRI) != AbsSource)
+    return nullptr;
+
+  return MRI.getVRegDef(AbsSource);
+}
+
+static bool runSAbs16Fixup(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+  bool Changed = false;
+  
+  for (MachineBasicBlock &MBB : MF)
+    for (MachineInstr &MI : make_early_inc_range(MBB)) {
+      bool IsPositive = MI.getOpcode() == AMDGPU::S_MAX_I32;
+      bool IsNegative = MI.getOpcode() == AMDGPU::S_MIN_I32;
+      MachineInstr* AbsSourceMI;
+      if ((!IsPositive && !IsNegative) ||
+          !(AbsSourceMI = matchExpandAbsPattern(MI, MRI)))
+        continue;
+
+      Register SextDestReg =
+          MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      Register AbsDestReg =
+          IsNegative ? MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass)
+                     : MI.getOperand(0).getReg();
+
+      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SEXT_I32_I16),
+              SextDestReg)
+          .addReg(AbsSourceMI->getOperand(0).getReg());
+      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_ABS_I32), AbsDestReg)
+          .addReg(SextDestReg);
+
+      if(IsNegative)
+        BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SUB_I32),
+                MI.getOperand(0).getReg())
+            .addImm(0)
+            .addReg(AbsDestReg);
+
+      MI.eraseFromParent();
+      Changed = true;
+    }
+
+  return Changed;
+}
+
+PreservedAnalyses SISAbs16FixupPass::run(MachineFunction &MF,
+                                         MachineFunctionAnalysisManager &MFAM) {
+  bool Changed = runSAbs16Fixup(MF);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  // TODO: Probably preserves most.
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
+
+class SISAbs16FixupLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  SISAbs16FixupLegacy() : MachineFunctionPass(ID) {
+    initializeSISAbs16FixupLegacyPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "SI SAbs16 Fixup"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+bool SISAbs16FixupLegacy::runOnMachineFunction(MachineFunction &MF) {
+  return runSAbs16Fixup(MF);
+}
+
+INITIALIZE_PASS_BEGIN(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
+                      false, false)
+INITIALIZE_PASS_END(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
+                    false, false)
+
+char SISAbs16FixupLegacy::ID = 0;
+
+char &llvm::SISAbs16FixupLegacyID = SISAbs16FixupLegacy::ID;
+
+FunctionPass *llvm::createSISAbs16FixupLegacyPass() {
+  return new SISAbs16FixupLegacy();
+}

>From 789d94ea33d2d4854d95214a52141c5f07614305 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Fri, 31 Oct 2025 14:29:42 -0500
Subject: [PATCH 07/21] Run update_llc_test_checks.py

---
 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
index e61abb7173d78..0cdbedd837396 100644
--- a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
 
 define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
 ; CHECK-LABEL: abs_i16:
-; CHECK: %bb.0:
-; CHECK-NEXT: s_sext_i32_i16 s0, s0
-; CHECK-NEXT: s_abs_i32 s0, s0
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_sext_i32_i16 s0, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
+; CHECK-NEXT:    ; return to shader part epilog
 
   %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   ret i16 %res
@@ -12,11 +15,12 @@ define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
 
 define amdgpu_ps i16 @abs_i16_neg(i16 inreg %arg) {
 ; CHECK-LABEL: abs_i16_neg:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_sext_i32_i16 s0, s0
-; CHECK-NEXT: s_abs_i32 s0, s0
-; CHECK-NEXT: s_sub_i32 s0, 0, s0
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_sext_i32_i16 s0, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
+; CHECK-NEXT:    s_sub_i32 s0, 0, s0
+; CHECK-NEXT:    ; return to shader part epilog
   %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   %res2 = sub i16 0, %res1
   ret i16 %res2
-}
\ No newline at end of file
+}

>From 7eb02503cbe694cf3e4e808743a6fc852fdb2a25 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 3 Nov 2025 16:05:57 -0500
Subject: [PATCH 08/21] Attempt #4, with DAG again

---
 llvm/lib/Target/AMDGPU/AMDGPU.h               |  11 --
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 -
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  26 +++
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |   1 +
 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp      | 168 ------------------
 6 files changed, 27 insertions(+), 183 deletions(-)
 delete mode 100644 llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f7e46430d658f..67042b700c047 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -39,7 +39,6 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsLegacyPass();
 FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
-FunctionPass *createSISAbs16FixupLegacyPass();
 FunctionPass *createSIShrinkInstructionsLegacyPass();
 FunctionPass *createSILoadStoreOptimizerLegacyPass();
 FunctionPass *createSIWholeQuadModeLegacyPass();
@@ -94,13 +93,6 @@ class SILowerI1CopiesPass : public PassInfoMixin<SILowerI1CopiesPass> {
                         MachineFunctionAnalysisManager &MFAM);
 };
 
-class SISAbs16FixupPass : public PassInfoMixin<SISAbs16FixupPass> {
-public:
-  SISAbs16FixupPass() = default;
-  PreservedAnalyses run(MachineFunction &MF,
-                        MachineFunctionAnalysisManager &MFAM);
-};
-
 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
 
 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
@@ -205,9 +197,6 @@ extern char &SILowerWWMCopiesLegacyID;
 void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
 extern char &SILowerI1CopiesLegacyID;
 
-void initializeSISAbs16FixupLegacyPass(PassRegistry &);
-extern char &SISAbs16FixupLegacyID;
-
 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
 extern char &AMDGPUGlobalISelDivergenceLoweringID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3065658f4d8f3..b87b54ffc4f12 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -551,7 +551,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR);
   initializeGCNDPPCombineLegacyPass(*PR);
   initializeSILowerI1CopiesLegacyPass(*PR);
-  initializeSISAbs16FixupLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeAMDGPURegBankSelectPass(*PR);
   initializeAMDGPURegBankLegalizePass(*PR);
@@ -1522,7 +1521,6 @@ bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(&SIFixSGPRCopiesLegacyID);
   addPass(createSILowerI1CopiesLegacyPass());
-  addPass(createSISAbs16FixupLegacyPass());
   return false;
 }
 
@@ -2217,7 +2215,6 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
   addPass(AMDGPUISelDAGToDAGPass(TM));
   addPass(SIFixSGPRCopiesPass());
   addPass(SILowerI1CopiesPass());
-  addPass(SISAbs16FixupPass());
   return Error::success();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index cd9225acdb002..a1e0e5293c706 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,7 +185,6 @@ add_llvm_target(AMDGPUCodeGen
   SIPreEmitPeephole.cpp
   SIProgramInfo.cpp
   SIRegisterInfo.cpp
-  SISAbs16Fixup.cpp
   SIShrinkInstructions.cpp
   SIWholeQuadMode.cpp
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 768c0abd2e3f1..99baac991a128 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -177,6 +177,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
     addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
     addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
+
+    // We don't want the default expansion of 16-bit ABS since we can
+    // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
+    setOperationAction(ISD::ABS, MVT::i16, Custom);
   }
 
   addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -6774,6 +6778,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DEBUGTRAP:
     return lowerDEBUGTRAP(Op, DAG);
   case ISD::ABS:
+    if (Op.getValueType() == MVT::i16)
+      return lowerABSi16(Op, DAG);
+    // fall through
   case ISD::FABS:
   case ISD::FNEG:
   case ISD::FCANONICALIZE:
@@ -8139,6 +8146,25 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
 }
 
+// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
+SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::ABS &&
+         "Tried to select abs with non-abs opcode.");
+  assert(Op.getValueType() == MVT::i16 &&
+         "Tried to select abs i16 lowering with non-i16 type.");
+
+  // divergent means will not end up using SGPRs
+  if (Op->isDivergent())
+    return SDValue();
+
+  //(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
+  SDValue Src = Op.getOperand(0);
+  SDLoc DL(Src);
+  SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+  SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
+}
+
 SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
                                              SelectionDAG &DAG) const {
   if (Subtarget->hasApertureRegs()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 74e58f4272e10..25e94851c24df 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -184,6 +184,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerABSi16(SDValue Op, SelectionDAG &DAG) const;
 
   SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
deleted file mode 100644
index fd305b6ffc061..0000000000000
--- a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-//===-- SISAbs16Fixup.cpp - Lower I1 Copies -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass matches the pattern for 16-bit ABS instructions after they have
-// been lowered to for execution on the Scalar Unit.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineSSAUpdater.h"
-#include "llvm/InitializePasses.h"
-
-#define DEBUG_TYPE "si-abs16-pattern"
-
-using namespace llvm;
-
-static Register pierceCopies(Register R, MachineRegisterInfo& MRI) {
-  MachineInstr *CopyMI = MRI.getVRegDef(R);
-  while (CopyMI && CopyMI->getOpcode() == AMDGPU::COPY) {
-    Register T = CopyMI->getOperand(1).getReg();
-    if (!T.isVirtual())
-      break;
-
-    R = T;
-    CopyMI = MRI.getVRegDef(R);
-  }
-
-  return R;
-}
-
-static MachineInstr *matchExpandAbsPattern(MachineInstr &MI,
-                                           MachineRegisterInfo &MRI) {
-  std::array<MachineInstr *, 2> SextInstructions;
-  for (unsigned I = 0; I < SextInstructions.size(); I++)
-  {
-    SextInstructions[I] = MRI.getVRegDef(MI.getOperand(I + 1).getReg());
-    if (SextInstructions[I]->getOpcode() != AMDGPU::S_SEXT_I32_I16)
-      return nullptr;
-  }
-
-  Register AbsSource;
-  MachineInstr* SubIns = nullptr;
-  for (MachineInstr *SextMI : SextInstructions) {
-    Register SextReg = SextMI->getOperand(1).getReg();
-    MachineInstr* OperandMI = MRI.getVRegDef(SextReg);
-    if (OperandMI->getOpcode() == AMDGPU::S_SUB_I32)
-      if(!SubIns)
-        SubIns = OperandMI;
-      else
-        return nullptr;
-    else
-      AbsSource = pierceCopies(SextReg,MRI);
-  }
-
-  if (!SubIns)
-    return nullptr;
-
-  if (MRI.getRegClass(AbsSource) != &AMDGPU::SGPR_32RegClass)
-    return nullptr;
-
-  MachineInstr &MustBeZero =
-      *MRI.getVRegDef(pierceCopies(SubIns->getOperand(1).getReg(), MRI));
-  if (MustBeZero.getOpcode() != AMDGPU::S_MOV_B32 ||
-      MustBeZero.getOperand(1).getImm())
-    return nullptr;
-
-  if (pierceCopies(SubIns->getOperand(2).getReg(), MRI) != AbsSource)
-    return nullptr;
-
-  return MRI.getVRegDef(AbsSource);
-}
-
-static bool runSAbs16Fixup(MachineFunction &MF) {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
-
-  bool Changed = false;
-  
-  for (MachineBasicBlock &MBB : MF)
-    for (MachineInstr &MI : make_early_inc_range(MBB)) {
-      bool IsPositive = MI.getOpcode() == AMDGPU::S_MAX_I32;
-      bool IsNegative = MI.getOpcode() == AMDGPU::S_MIN_I32;
-      MachineInstr* AbsSourceMI;
-      if ((!IsPositive && !IsNegative) ||
-          !(AbsSourceMI = matchExpandAbsPattern(MI, MRI)))
-        continue;
-
-      Register SextDestReg =
-          MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-      Register AbsDestReg =
-          IsNegative ? MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass)
-                     : MI.getOperand(0).getReg();
-
-      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SEXT_I32_I16),
-              SextDestReg)
-          .addReg(AbsSourceMI->getOperand(0).getReg());
-      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_ABS_I32), AbsDestReg)
-          .addReg(SextDestReg);
-
-      if(IsNegative)
-        BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SUB_I32),
-                MI.getOperand(0).getReg())
-            .addImm(0)
-            .addReg(AbsDestReg);
-
-      MI.eraseFromParent();
-      Changed = true;
-    }
-
-  return Changed;
-}
-
-PreservedAnalyses SISAbs16FixupPass::run(MachineFunction &MF,
-                                         MachineFunctionAnalysisManager &MFAM) {
-  bool Changed = runSAbs16Fixup(MF);
-  if (!Changed)
-    return PreservedAnalyses::all();
-
-  // TODO: Probably preserves most.
-  PreservedAnalyses PA;
-  PA.preserveSet<CFGAnalyses>();
-  return PA;
-}
-
-class SISAbs16FixupLegacy : public MachineFunctionPass {
-public:
-  static char ID;
-
-  SISAbs16FixupLegacy() : MachineFunctionPass(ID) {
-    initializeSISAbs16FixupLegacyPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  StringRef getPassName() const override { return "SI SAbs16 Fixup"; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-
-bool SISAbs16FixupLegacy::runOnMachineFunction(MachineFunction &MF) {
-  return runSAbs16Fixup(MF);
-}
-
-INITIALIZE_PASS_BEGIN(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
-                      false, false)
-INITIALIZE_PASS_END(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup",
-                    false, false)
-
-char SISAbs16FixupLegacy::ID = 0;
-
-char &llvm::SISAbs16FixupLegacyID = SISAbs16FixupLegacy::ID;
-
-FunctionPass *llvm::createSISAbs16FixupLegacyPass() {
-  return new SISAbs16FixupLegacy();
-}

>From 8b8b7055fa4889d654523b03786c6df03eb5b762 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 3 Nov 2025 17:36:51 -0500
Subject: [PATCH 09/21] For real?

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 99baac991a128..091704ddeba22 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -42,6 +42,7 @@
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/ModRef.h"
 #include "llvm/Transforms/Utils/LowerAtomic.h"
@@ -6780,7 +6781,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ABS:
     if (Op.getValueType() == MVT::i16)
       return lowerABSi16(Op, DAG);
-    // fall through
+    LLVM_FALLTHROUGH;
   case ISD::FABS:
   case ISD::FNEG:
   case ISD::FCANONICALIZE:

>From f2bb4433d57aeec3cf734a6d39d1238a70c68c4a Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 4 Nov 2025 18:06:05 -0500
Subject: [PATCH 10/21] Add testcase

---
 llvm/test/CodeGen/AMDGPU/llvm.abs.ll | 692 +++++++++++++++++++++++++++
 1 file changed, 692 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.abs.ll

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
new file mode 100644
index 0000000000000..8b8448e260d73
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
@@ -0,0 +1,692 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=SDAG6
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=SDAG8
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=SDAG10
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -o - < %s | FileCheck %s --check-prefixes=SDAG1250
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250
+
+declare i16 @llvm.abs.i16(i16, i1)
+declare i32 @llvm.abs.i32(i32, i1)
+declare i64 @llvm.abs.i64(i64, i1)
+declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
+declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
+declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+
+define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+
+; GFX6-LABEL: abs_sgpr_i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i16 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  ret i16 %res
+}
+
+define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i16_neg:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i16_neg:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i16_neg:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i16_neg:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
+; GFX6-LABEL: abs_sgpr_i16_neg:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_sub_i32 s0, 0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i16_neg:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_sub_i32 s0, 0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i16_neg:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_sub_i32 s0, 0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i16_neg:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i16 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  %res2 = sub i16 0, %res1
+  ret i16 %res2
+}
+
+define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
+; GFX-LABEL: abs_sgpr_i32:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_abs_i32 s0, s0
+; GFX-NEXT:    ; return to shader part epilog
+  %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
+  ret i32 %res
+}
+
+define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
+; GFX6-LABEL: abs_sgpr_i64:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_ashr_i32 s2, s1, 31
+; GFX6-NEXT:    s_add_u32 s0, s0, s2
+; GFX6-NEXT:    s_mov_b32 s3, s2
+; GFX6-NEXT:    s_addc_u32 s1, s1, s2
+; GFX6-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_ashr_i32 s2, s1, 31
+; GFX8-NEXT:    s_add_u32 s0, s0, s2
+; GFX8-NEXT:    s_mov_b32 s3, s2
+; GFX8-NEXT:    s_addc_u32 s1, s1, s2
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_ashr_i32 s2, s1, 31
+; GFX10-NEXT:    s_add_u32 s0, s0, s2
+; GFX10-NEXT:    s_mov_b32 s3, s2
+; GFX10-NEXT:    s_addc_u32 s1, s1, s2
+; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i64:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_ashr_i32 s2, s1, 31
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_mov_b32 s3, s2
+; GFX1250-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
+  ret i64 %res
+}
+
+define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
+; GFX-LABEL: abs_sgpr_v4i32:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_abs_i32 s0, s0
+; GFX-NEXT:    s_abs_i32 s1, s1
+; GFX-NEXT:    s_abs_i32 s2, s2
+; GFX-NEXT:    s_abs_i32 s3, s3
+; GFX-NEXT:    ; return to shader part epilog
+  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
+  ret <4 x i32> %res
+}
+
+define i16 @abs_vgpr_i16(i16 %arg) {
+; GFX6-LABEL: abs_vgpr_i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
+; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_sub_nc_u16 v1, 0, v0
+; GFX10-NEXT:    v_max_i16 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_sub_nc_u16 v1, 0, v0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_max_i16 v0, v0, v1
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  ret i16 %res
+}
+
+define i32 @abs_vgpr_i32(i32 %arg) {
+; GFX6-LABEL: abs_vgpr_i32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_i32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
+; GFX8-NEXT:    v_max_i32_e32 v0, v0, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; GFX10-NEXT:    v_max_i32_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_max_i32_e32 v0, v0, v1
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
+  ret i32 %res
+}
+
+define i64 @abs_vgpr_i64(i64 %arg) {
+; GFX6-LABEL: abs_vgpr_i64:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX6-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_i64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX8-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_i64:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT:    v_mov_b32_e32 v3, v2
+; GFX1250-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX1250-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
+  ret i64 %res
+}
+
+define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
+; GFX6-LABEL: abs_vgpr_v4i32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v4
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v4
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
+; GFX6-NEXT:    v_max_i32_e32 v2, v2, v4
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; GFX6-NEXT:    v_max_i32_e32 v3, v3, v4
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_v4i32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v0
+; GFX8-NEXT:    v_max_i32_e32 v0, v0, v4
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v1
+; GFX8-NEXT:    v_max_i32_e32 v1, v1, v4
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v2
+; GFX8-NEXT:    v_max_i32_e32 v2, v2, v4
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v3
+; GFX8-NEXT:    v_max_i32_e32 v3, v3, v4
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_v4i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
+; GFX10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
+; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
+; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
+; GFX10-NEXT:    v_max_i32_e32 v0, v0, v4
+; GFX10-NEXT:    v_max_i32_e32 v1, v1, v5
+; GFX10-NEXT:    v_max_i32_e32 v2, v2, v6
+; GFX10-NEXT:    v_max_i32_e32 v3, v3, v7
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_v4i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1
+; GFX1250-NEXT:    v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-NEXT:    v_max_i32_e32 v0, v0, v4
+; GFX1250-NEXT:    v_max_i32_e32 v1, v1, v5
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250-NEXT:    v_max_i32_e32 v2, v2, v6
+; GFX1250-NEXT:    v_max_i32_e32 v3, v3, v7
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
+  ret <4 x i32> %res
+}
+
+define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
+; GFX-LABEL: abs_sgpr_v2i8:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_sext_i32_i8 s0, s0
+; GFX-NEXT:    s_sext_i32_i8 s1, s1
+; GFX-NEXT:    s_abs_i32 s0, s0
+; GFX-NEXT:    s_abs_i32 s1, s1
+; GFX-NEXT:    ; return to shader part epilog
+  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
+  ret <2 x i8> %res
+}
+
+define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
+; GFX6-LABEL: abs_vgpr_v2i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_v2i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v2, 0
+; GFX8-NEXT:    v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-NEXT:    v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_v2i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX10-NEXT:    v_sub_nc_u16 v2, 0, v0
+; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
+; GFX10-NEXT:    v_max_i16 v0, v0, v2
+; GFX10-NEXT:    v_max_i16 v1, v1, v3
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_v2i8:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT:    v_sub_nc_u16 v2, 0, v0
+; GFX1250-NEXT:    v_sub_nc_u16 v3, 0, v1
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT:    v_max_i16 v0, v0, v2
+; GFX1250-NEXT:    v_max_i16 v1, v1, v3
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
+  ret <2 x i8> %res
+}
+
+define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
+; GFX-LABEL: abs_sgpr_v3i8:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_sext_i32_i8 s0, s0
+; GFX-NEXT:    s_sext_i32_i8 s1, s1
+; GFX-NEXT:    s_sext_i32_i8 s2, s2
+; GFX-NEXT:    s_abs_i32 s0, s0
+; GFX-NEXT:    s_abs_i32 s1, s1
+; GFX-NEXT:    s_abs_i32 s2, s2
+; GFX-NEXT:    ; return to shader part epilog
+  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
+  ret <3 x i8> %res
+}
+
+define <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
+; GFX6-LABEL: abs_vgpr_v3i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
+; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_v3i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v3, 0
+; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-NEXT:    v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-NEXT:    v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_v3i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX10-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v0
+; GFX10-NEXT:    v_sub_nc_u16 v4, 0, v1
+; GFX10-NEXT:    v_sub_nc_u16 v5, 0, v2
+; GFX10-NEXT:    v_max_i16 v0, v0, v3
+; GFX10-NEXT:    v_max_i16 v1, v1, v4
+; GFX10-NEXT:    v_max_i16 v2, v2, v5
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_v3i8:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX1250-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-NEXT:    v_sub_nc_u16 v3, 0, v0
+; GFX1250-NEXT:    v_sub_nc_u16 v4, 0, v1
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-NEXT:    v_sub_nc_u16 v5, 0, v2
+; GFX1250-NEXT:    v_max_i16 v0, v0, v3
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-NEXT:    v_max_i16 v1, v1, v4
+; GFX1250-NEXT:    v_max_i16 v2, v2, v5
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
+  ret <3 x i8> %res
+}
+
+define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
+; GFX6-LABEL: abs_sgpr_v2i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_sext_i32_i16 s1, s1
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_abs_i32 s1, s1
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_v2i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s1, s1
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX8-NEXT:    s_or_b32 s0, s0, s1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_v2i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s1, s0
+; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX10-NEXT:    s_abs_i32 s1, s1
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_v2i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i16 s1, s0
+; GFX1250-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX1250-NEXT:    s_abs_i32 s1, s1
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
+  ret <2 x i16> %res
+}
+
+define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
+; GFX6-LABEL: abs_vgpr_v2i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_v2i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v2, 0
+; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
+; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_max_i16_e32 v1, v0, v1
+; GFX8-NEXT:    v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_v2i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_sub_i16 v1, 0, v0
+; GFX10-NEXT:    v_pk_max_i16 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_v2i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_pk_sub_i16 v1, 0, v0
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_pk_max_i16 v0, v0, v1
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
+  ret <2 x i16> %res
+}
+
+define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
+; GFX6-LABEL: abs_sgpr_v3i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_sext_i32_i16 s1, s1
+; GFX6-NEXT:    s_sext_i32_i16 s2, s2
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_abs_i32 s1, s1
+; GFX6-NEXT:    s_abs_i32 s2, s2
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_v3i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX8-NEXT:    s_sext_i32_i16 s2, s2
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s2, s2
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
+; GFX8-NEXT:    s_abs_i32 s1, s1
+; GFX8-NEXT:    s_or_b32 s0, s0, s2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_v3i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s2, s0
+; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX10-NEXT:    s_abs_i32 s2, s2
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_sext_i32_i16 s1, s1
+; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
+; GFX10-NEXT:    s_abs_i32 s1, s1
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_v3i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i16 s2, s0
+; GFX1250-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX1250-NEXT:    s_abs_i32 s2, s2
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    s_sext_i32_i16 s1, s1
+; GFX1250-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
+; GFX1250-NEXT:    s_abs_i32 s1, s1
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
+  ret <3 x i16> %res
+}
+
+define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
+; GFX6-LABEL: abs_vgpr_v3i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
+; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: abs_vgpr_v3i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v3, 0
+; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v0
+; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_sub_u16_e32 v4, 0, v1
+; GFX8-NEXT:    v_max_i16_e32 v2, v0, v2
+; GFX8-NEXT:    v_max_i16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT:    v_max_i16_e32 v1, v1, v4
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: abs_vgpr_v3i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_sub_i16 v2, 0, v0
+; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
+; GFX10-NEXT:    v_pk_max_i16 v0, v0, v2
+; GFX10-NEXT:    v_max_i16 v1, v1, v3
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: abs_vgpr_v3i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_pk_sub_i16 v2, 0, v0
+; GFX1250-NEXT:    v_sub_nc_u16 v3, 0, v1
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-NEXT:    v_pk_max_i16 v0, v0, v2
+; GFX1250-NEXT:    v_max_i16 v1, v1, v3
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
+  ret <3 x i16> %res
+}

>From 49892ede7e21fa459b0d3db48fb0c4a7d9957c67 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 4 Nov 2025 18:11:33 -0500
Subject: [PATCH 11/21] Delete new testcase

---
 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/s_abs_i16.ll

diff --git a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
deleted file mode 100644
index 0cdbedd837396..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s
-
-define amdgpu_ps i16 @abs_i16(i16 inreg %arg) {
-; CHECK-LABEL: abs_i16:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_sext_i32_i16 s0, s0
-; CHECK-NEXT:    s_abs_i32 s0, s0
-; CHECK-NEXT:    ; return to shader part epilog
-
-  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
-  ret i16 %res
-}
-
-define amdgpu_ps i16 @abs_i16_neg(i16 inreg %arg) {
-; CHECK-LABEL: abs_i16_neg:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_sext_i32_i16 s0, s0
-; CHECK-NEXT:    s_abs_i32 s0, s0
-; CHECK-NEXT:    s_sub_i32 s0, 0, s0
-; CHECK-NEXT:    ; return to shader part epilog
-  %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
-  %res2 = sub i16 0, %res1
-  ret i16 %res2
-}

>From 0c29f15ea1bd10b10fc0cd0bffddf4734b887bf6 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 4 Nov 2025 18:25:20 -0500
Subject: [PATCH 12/21] Fix testcase

---
 llvm/test/CodeGen/AMDGPU/llvm.abs.ll | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
index 8b8448e260d73..bac900cf0f3a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
@@ -72,11 +72,13 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
 }
 
 define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
+; COM: Suboptimal code generation on Tahiti.
 ; SDAG6-LABEL: abs_sgpr_i16_neg:
 ; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG6-NEXT:    s_sext_i32_i16 s1, s0
+; SDAG6-NEXT:    s_ashr_i32 s1, s1, 15
+; SDAG6-NEXT:    s_xor_b32 s0, s0, s1
+; SDAG6-NEXT:    s_sub_i32 s0, s1, s0
 ; SDAG6-NEXT:    ; return to shader part epilog
 ;
 ; SDAG8-LABEL: abs_sgpr_i16_neg:

>From 47e5e7c3a6153bd80543a8278cd24c52365aa67c Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 5 Nov 2025 15:56:14 -0500
Subject: [PATCH 13/21] Update testcase

---
 llvm/test/CodeGen/AMDGPU/absdiff.ll | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/absdiff.ll b/llvm/test/CodeGen/AMDGPU/absdiff.ll
index 9cb397fb9d1c6..ee8241e355e26 100644
--- a/llvm/test/CodeGen/AMDGPU/absdiff.ll
+++ b/llvm/test/CodeGen/AMDGPU/absdiff.ll
@@ -5,10 +5,8 @@ define amdgpu_ps i16 @absdiff_i16_false(i16 inreg %arg0, i16 inreg %arg1) {
 ; CHECK-LABEL: absdiff_i16_false:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_sub_i32 s0, s0, s1
-; CHECK-NEXT:    s_sext_i32_i16 s1, s0
-; CHECK-NEXT:    s_sub_i32 s0, 0, s0
 ; CHECK-NEXT:    s_sext_i32_i16 s0, s0
-; CHECK-NEXT:    s_max_i32 s0, s1, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
 ; CHECK-NEXT:    ; return to shader part epilog
   %diff = sub i16 %arg0, %arg1
   %res = call i16 @llvm.abs.i16(i16 %diff, i1 false) ; INT_MIN input returns INT_MIN
@@ -19,10 +17,8 @@ define amdgpu_ps i16 @absdiff_i16_true(i16 inreg %arg0, i16 inreg %arg1) {
 ; CHECK-LABEL: absdiff_i16_true:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_sub_i32 s0, s0, s1
-; CHECK-NEXT:    s_sext_i32_i16 s1, s0
-; CHECK-NEXT:    s_sub_i32 s0, 0, s0
 ; CHECK-NEXT:    s_sext_i32_i16 s0, s0
-; CHECK-NEXT:    s_max_i32 s0, s1, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
 ; CHECK-NEXT:    ; return to shader part epilog
   %diff = sub i16 %arg0, %arg1
   %res = call i16 @llvm.abs.i16(i16 %diff, i1 true) ; INT_MIN input returns poison

>From 5d6673f6481310badf134143983a2a5afe0d22ec Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 5 Nov 2025 17:11:24 -0500
Subject: [PATCH 14/21] i8

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 +--
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 ++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  3 +++
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 26 -------------------
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  1 -
 llvm/test/CodeGen/AMDGPU/llvm.abs.ll          | 11 ++++++++
 6 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4d5d1fc7dfadc..a3c19c6777790 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5575,8 +5575,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   /// \param N Node to expand
   /// \param IsNegative indicate negated abs
   /// \returns The expansion result or SDValue() if it fails.
-  SDValue expandABS(SDNode *N, SelectionDAG &DAG,
-                    bool IsNegative = false) const;
+  virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                            bool IsNegative = false) const;
 
   /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
   /// \param N Node to expand
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f5081a9d2dd56..f7dc3a48553d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5287,6 +5287,28 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
 }
 
+SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+                                        bool IsNegative) const {
+  if (N->isDivergent() ||
+      (N->getValueType(0) != MVT::i8 && N->getValueType(0) != MVT::i16))
+    return TargetLowering::expandABS(N, DAG, IsNegative);
+
+  //(abs i8/i16 (i8/i16 op1)) -> (trunc i8/i16 (abs i32 (sext i32 (i8/i16
+  // op1))))
+  SDValue Src = N->getOperand(0);
+  SDLoc DL(Src);
+  SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+  SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
+  SDValue TruncResult =
+      DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), ExtAbs);
+
+  if (!IsNegative)
+    return TruncResult;
+
+  return DAG.getNode(ISD::SUB, DL, N->getValueType(0),
+                     DAG.getConstant(0, DL, N->getValueType(0)), TruncResult);
+}
+
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bdaf48652d107..57ce10b8b582f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -45,6 +45,9 @@ class AMDGPUTargetLowering : public TargetLowering {
   /// original size will not change the value.
   static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
 
+  virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+                            bool IsNegative = false) const override;
+
 protected:
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 091704ddeba22..12138503f9708 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -178,10 +178,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
     addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
     addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
-
-    // We don't want the default expansion of 16-bit ABS since we can
-    // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
-    setOperationAction(ISD::ABS, MVT::i16, Custom);
   }
 
   addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -6779,9 +6775,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DEBUGTRAP:
     return lowerDEBUGTRAP(Op, DAG);
   case ISD::ABS:
-    if (Op.getValueType() == MVT::i16)
-      return lowerABSi16(Op, DAG);
-    LLVM_FALLTHROUGH;
   case ISD::FABS:
   case ISD::FNEG:
   case ISD::FCANONICALIZE:
@@ -8147,25 +8140,6 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
 }
 
-// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
-SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getOpcode() == ISD::ABS &&
-         "Tried to select abs with non-abs opcode.");
-  assert(Op.getValueType() == MVT::i16 &&
-         "Tried to select abs i16 lowering with non-i16 type.");
-
-  // divergent means will not end up using SGPRs
-  if (Op->isDivergent())
-    return SDValue();
-
-  //(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
-  SDValue Src = Op.getOperand(0);
-  SDLoc DL(Src);
-  SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
-  SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
-  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
-}
-
 SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
                                              SelectionDAG &DAG) const {
   if (Subtarget->hasApertureRegs()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 25e94851c24df..74e58f4272e10 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -184,7 +184,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue lowerABSi16(SDValue Op, SelectionDAG &DAG) const;
 
   SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
index bac900cf0f3a9..0a1b79d7c6ece 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
@@ -17,6 +17,17 @@ declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
 declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
 
+define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
+  %res = call i8 @llvm.abs.i8(i8 %arg, i1 false)
+  ret i8 %res
+}
+
+define amdgpu_cs i8 @abs_sgpr_i8_neg(i8 inreg %arg) {
+  %res1 = call i8 @llvm.abs.i8(i8 %arg, i1 false)
+  %res2 = sub i8 0, %res1
+  ret i8 %res2
+}
+
 define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
 ; SDAG6-LABEL: abs_sgpr_i16:
 ; SDAG6:       ; %bb.0:

>From 88b8e2f0aaf6b9e4996c3667aed7845f55641986 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 5 Nov 2025 17:12:43 -0500
Subject: [PATCH 15/21] Update i8 testcase

---
 llvm/test/CodeGen/AMDGPU/llvm.abs.ll | 798 ++++++++++++++++++++++++++-
 1 file changed, 790 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
index 0a1b79d7c6ece..dd7d2fbc931b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
@@ -18,11 +18,120 @@ declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
 
 define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
+; GFX6-LABEL: abs_sgpr_i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i8 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i8 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i8 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i8:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i8 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
   %res = call i8 @llvm.abs.i8(i8 %arg, i1 false)
   ret i8 %res
 }
 
 define amdgpu_cs i8 @abs_sgpr_i8_neg(i8 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i8_neg:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i8_neg:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i8_neg:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i8_neg:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
+; GFX6-LABEL: abs_sgpr_i8_neg:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i8 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_sub_i32 s0, 0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i8_neg:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i8 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_sub_i32 s0, 0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i8_neg:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i8 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_sub_i32 s0, 0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i8_neg:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i8 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
   %res1 = call i8 @llvm.abs.i8(i8 %arg, i1 false)
   %res2 = sub i8 0, %res1
   ret i8 %res2
@@ -53,7 +162,7 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
 ; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; SDAG1250-NEXT:    s_abs_i32 s0, s0
 ; SDAG1250-NEXT:    ; return to shader part epilog
-
+;
 ; GFX6-LABEL: abs_sgpr_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_sext_i32_i16 s0, s0
@@ -78,18 +187,17 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
 ; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1250-NEXT:    s_abs_i32 s0, s0
 ; GFX1250-NEXT:    ; return to shader part epilog
+
   %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   ret i16 %res
 }
 
 define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
-; COM: Suboptimal code generation on Tahiti.
 ; SDAG6-LABEL: abs_sgpr_i16_neg:
 ; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i16 s1, s0
-; SDAG6-NEXT:    s_ashr_i32 s1, s1, 15
-; SDAG6-NEXT:    s_xor_b32 s0, s0, s1
-; SDAG6-NEXT:    s_sub_i32 s0, s1, s0
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
 ; SDAG6-NEXT:    ; return to shader part epilog
 ;
 ; SDAG8-LABEL: abs_sgpr_i16_neg:
@@ -109,7 +217,7 @@ define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
 ; SDAG1250-LABEL: abs_sgpr_i16_neg:
 ; SDAG1250:       ; %bb.0:
 ; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; SDAG1250-NEXT:    s_abs_i32 s0, s0
 ; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
 ; SDAG1250-NEXT:    ; return to shader part epilog
@@ -138,7 +246,7 @@ define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
 ; GFX1250-LABEL: abs_sgpr_i16_neg:
 ; GFX1250:       ; %bb.0:
 ; GFX1250-NEXT:    s_sext_i32_i16 s0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1250-NEXT:    s_abs_i32 s0, s0
 ; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
 ; GFX1250-NEXT:    ; return to shader part epilog
@@ -148,6 +256,26 @@ define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
 }
 
 define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_i32:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_abs_i32 s0, s0
@@ -157,6 +285,43 @@ define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
 }
 
 define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i64:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_ashr_i32 s2, s1, 31
+; SDAG6-NEXT:    s_mov_b32 s3, s2
+; SDAG6-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; SDAG6-NEXT:    s_sub_u32 s0, s0, s2
+; SDAG6-NEXT:    s_subb_u32 s1, s1, s2
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i64:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_ashr_i32 s2, s1, 31
+; SDAG8-NEXT:    s_mov_b32 s3, s2
+; SDAG8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; SDAG8-NEXT:    s_sub_u32 s0, s0, s2
+; SDAG8-NEXT:    s_subb_u32 s1, s1, s2
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i64:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_ashr_i32 s2, s1, 31
+; SDAG10-NEXT:    s_mov_b32 s3, s2
+; SDAG10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; SDAG10-NEXT:    s_sub_u32 s0, s0, s2
+; SDAG10-NEXT:    s_subb_u32 s1, s1, s2
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i64:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sub_nc_u64 s[2:3], 0, s[0:1]
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i64 v[0:1], s[0:1], s[2:3]
+; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; SDAG1250-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s2, s1, 31
@@ -198,6 +363,38 @@ define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
 }
 
 define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v4i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_abs_i32 s3, s3
+; SDAG6-NEXT:    s_abs_i32 s2, s2
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v4i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_abs_i32 s3, s3
+; SDAG8-NEXT:    s_abs_i32 s2, s2
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v4i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_abs_i32 s3, s3
+; SDAG10-NEXT:    s_abs_i32 s2, s2
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s1, s1
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v4i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_abs_i32 s3, s3
+; SDAG1250-NEXT:    s_abs_i32 s2, s2
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s1, s1
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_v4i32:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_abs_i32 s0, s0
@@ -210,6 +407,37 @@ define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
 }
 
 define i16 @abs_vgpr_i16(i16 %arg) {
+; SDAG6-LABEL: abs_vgpr_i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_sub_u16_e32 v1, 0, v0
+; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_sub_nc_u16 v1, 0, v0
+; SDAG10-NEXT:    v_max_i16 v0, v0, v1
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_sub_nc_u16 v1, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i16 v0, v0, v1
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -245,6 +473,36 @@ define i16 @abs_vgpr_i16(i16 %arg) {
 }
 
 define i32 @abs_vgpr_i32(i32 %arg) {
+; SDAG6-LABEL: abs_vgpr_i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
+; SDAG8-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; SDAG10-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -279,6 +537,45 @@ define i32 @abs_vgpr_i32(i32 %arg) {
 }
 
 define i64 @abs_vgpr_i64(i64 %arg) {
+; SDAG6-LABEL: abs_vgpr_i64:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; SDAG6-NEXT:    v_xor_b32_e32 v0, v0, v2
+; SDAG6-NEXT:    v_xor_b32_e32 v1, v1, v2
+; SDAG6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; SDAG6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_i64:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; SDAG8-NEXT:    v_xor_b32_e32 v0, v0, v2
+; SDAG8-NEXT:    v_xor_b32_e32 v1, v1, v2
+; SDAG8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2
+; SDAG8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_i64:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; SDAG10-NEXT:    v_xor_b32_e32 v0, v0, v2
+; SDAG10-NEXT:    v_xor_b32_e32 v1, v1, v2
+; SDAG10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2
+; SDAG10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_i64:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_sub_nc_u64_e32 v[2:3], 0, v[0:1]
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i64 v[0:1], v[0:1], v[2:3]
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -326,6 +623,59 @@ define i64 @abs_vgpr_i64(i64 %arg) {
 }
 
 define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
+; SDAG6-LABEL: abs_vgpr_v4i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v4, v1
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
+; SDAG6-NEXT:    v_max_i32_e32 v2, v4, v2
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; SDAG6-NEXT:    v_max_i32_e32 v3, v4, v3
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v4i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v0
+; SDAG8-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v1
+; SDAG8-NEXT:    v_max_i32_e32 v1, v4, v1
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v2
+; SDAG8-NEXT:    v_max_i32_e32 v2, v4, v2
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v3
+; SDAG8-NEXT:    v_max_i32_e32 v3, v4, v3
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v4i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
+; SDAG10-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG10-NEXT:    v_max_i32_e32 v1, v5, v1
+; SDAG10-NEXT:    v_max_i32_e32 v2, v6, v2
+; SDAG10-NEXT:    v_max_i32_e32 v3, v7, v3
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v4i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1
+; SDAG1250-NEXT:    v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG1250-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG1250-NEXT:    v_max_i32_e32 v1, v5, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; SDAG1250-NEXT:    v_max_i32_e32 v2, v6, v2
+; SDAG1250-NEXT:    v_max_i32_e32 v3, v7, v3
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -383,6 +733,53 @@ define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
 }
 
 define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v2i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_or_b32 s0, s0, s2
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v2i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG8-NEXT:    s_or_b32 s0, s0, s2
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v2i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s1, s1
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG10-NEXT:    s_or_b32 s0, s0, s2
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v2i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG1250-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s1, s1
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_or_b32 s0, s0, s2
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_v2i8:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_sext_i32_i8 s0, s0
@@ -395,6 +792,64 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
 }
 
 define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
+; SDAG6-LABEL: abs_vgpr_v2i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v2, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v2, v1
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v2, 8, v1
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v2i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG8-NEXT:    v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_lshlrev_b16_e32 v2, 8, v1
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG8-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v2i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG10-NEXT:    v_sub_nc_u16 v2, 0, v1
+; SDAG10-NEXT:    v_sub_nc_u16 v3, 0, v0
+; SDAG10-NEXT:    v_max_i16 v1, v1, v2
+; SDAG10-NEXT:    v_max_i16 v0, v0, v3
+; SDAG10-NEXT:    v_lshlrev_b16 v2, 8, v1
+; SDAG10-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; SDAG10-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v2i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_sub_nc_u16 v2, 0, v1
+; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_max_i16 v1, v1, v2
+; SDAG1250-NEXT:    v_max_i16 v0, v0, v3
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_lshlrev_b16 v2, 8, v1
+; SDAG1250-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; SDAG1250-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v2i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -445,6 +900,79 @@ define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
 }
 
 define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v3i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_sext_i32_i8 s2, s2
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_abs_i32 s2, s2
+; SDAG6-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG6-NEXT:    s_or_b32 s0, s0, s1
+; SDAG6-NEXT:    s_or_b32 s0, s0, s3
+; SDAG6-NEXT:    s_lshr_b32 s1, s0, 8
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v3i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_bfe_i32 s2, s2, 0x80000
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_sext_i32_i16 s2, s2
+; SDAG8-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG8-NEXT:    s_abs_i32 s2, s2
+; SDAG8-NEXT:    s_or_b32 s0, s0, s1
+; SDAG8-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG8-NEXT:    s_and_b32 s1, s0, 0xffff
+; SDAG8-NEXT:    s_or_b32 s1, s1, s3
+; SDAG8-NEXT:    s_lshr_b32 s1, s1, 8
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v3i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG10-NEXT:    s_bfe_i32 s2, s2, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s1, s1
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG10-NEXT:    s_sext_i32_i16 s2, s2
+; SDAG10-NEXT:    s_or_b32 s0, s0, s1
+; SDAG10-NEXT:    s_abs_i32 s2, s2
+; SDAG10-NEXT:    s_and_b32 s1, s0, 0xffff
+; SDAG10-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG10-NEXT:    s_or_b32 s1, s1, s3
+; SDAG10-NEXT:    s_lshr_b32 s1, s1, 8
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v3i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG1250-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG1250-NEXT:    s_bfe_i32 s2, s2, 0x80000
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s1, s1
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG1250-NEXT:    s_sext_i32_i16 s2, s2
+; SDAG1250-NEXT:    s_or_b32 s0, s0, s1
+; SDAG1250-NEXT:    s_abs_i32 s2, s2
+; SDAG1250-NEXT:    s_and_b32 s1, s0, 0xffff
+; SDAG1250-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_or_b32 s1, s1, s3
+; SDAG1250-NEXT:    s_lshr_b32 s1, s1, 8
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_v3i8:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_sext_i32_i8 s0, s0
@@ -459,6 +987,86 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
 }
 
 define <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
+; SDAG6-LABEL: abs_vgpr_v3i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v3, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v3, v1
+; SDAG6-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
+; SDAG6-NEXT:    v_max_i32_e32 v2, v1, v2
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG6-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v3i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v1, sext(v1), v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG8-NEXT:    v_sub_u16_sdwa v1, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v2, sext(v2), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SDAG8-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v3i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG10-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; SDAG10-NEXT:    v_sub_nc_u16 v3, 0, v1
+; SDAG10-NEXT:    v_sub_nc_u16 v4, 0, v0
+; SDAG10-NEXT:    v_sub_nc_u16 v5, 0, v2
+; SDAG10-NEXT:    v_max_i16 v1, v1, v3
+; SDAG10-NEXT:    v_max_i16 v0, v0, v4
+; SDAG10-NEXT:    v_max_i16 v2, v2, v5
+; SDAG10-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG10-NEXT:    v_lshlrev_b32_e32 v3, 16, v2
+; SDAG10-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG10-NEXT:    v_or_b32_sdwa v1, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDAG10-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v3i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG1250-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v1
+; SDAG1250-NEXT:    v_sub_nc_u16 v4, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; SDAG1250-NEXT:    v_max_i16 v1, v1, v3
+; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v2
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG1250-NEXT:    v_max_i16 v0, v0, v4
+; SDAG1250-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i16 v2, v2, v3
+; SDAG1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_bitop2_b32 v0, v0, v1 bitop3:0x54
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_and_b32_e32 v3, 0xffff, v0
+; SDAG1250-NEXT:    v_or_b32_e32 v1, v3, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v3i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -521,6 +1129,41 @@ define <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
 }
 
 define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v2i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_lshl_b32 s2, s1, 16
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_or_b32 s0, s0, s2
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v2i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_ashr_i32 s1, s0, 16
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_lshl_b32 s1, s1, 16
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_or_b32 s0, s0, s1
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v2i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG10-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG10-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v2i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_v2i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_sext_i32_i16 s0, s0
@@ -563,6 +1206,46 @@ define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
 }
 
 define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
+; SDAG6-LABEL: abs_vgpr_v2i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v2, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v2, v1
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v2i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG8-NEXT:    v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDAG8-NEXT:    v_sub_u16_e32 v2, 0, v0
+; SDAG8-NEXT:    v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v2
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v2i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_pk_sub_i16 v1, 0, v0
+; SDAG10-NEXT:    v_pk_max_i16 v0, v0, v1
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v2i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_pk_sub_i16 v1, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, v0, v1
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v2i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -605,6 +1288,55 @@ define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
 }
 
 define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v3i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i16 s3, s2
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_lshl_b32 s2, s1, 16
+; SDAG6-NEXT:    s_abs_i32 s3, s3
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_lshr_b64 s[4:5], s[2:3], 16
+; SDAG6-NEXT:    s_or_b32 s0, s0, s2
+; SDAG6-NEXT:    s_mov_b32 s1, s4
+; SDAG6-NEXT:    s_mov_b32 s2, s3
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v3i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_ashr_i32 s2, s0, 16
+; SDAG8-NEXT:    s_abs_i32 s2, s2
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_lshl_b32 s2, s2, 16
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_or_b32 s0, s0, s2
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v3i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG10-NEXT:    v_pk_sub_i16 v1, 0, s1
+; SDAG10-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG10-NEXT:    v_pk_max_i16 v1, s1, v1
+; SDAG10-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG10-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v3i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG1250-NEXT:    v_pk_sub_i16 v1, 0, s1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG1250-NEXT:    v_pk_max_i16 v1, s1, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG1250-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_v3i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_sext_i32_i16 s0, s0
@@ -654,6 +1386,56 @@ define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
 }
 
 define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
+; SDAG6-LABEL: abs_vgpr_v3i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; SDAG6-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; SDAG6-NEXT:    v_max_i32_e32 v0, v3, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v3, v1
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; SDAG6-NEXT:    v_max_i32_e32 v2, v3, v2
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG6-NEXT:    v_alignbit_b32 v1, v2, v1, 16
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v3i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG8-NEXT:    v_sub_u16_e32 v3, 0, v1
+; SDAG8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDAG8-NEXT:    v_max_i16_e32 v1, v1, v3
+; SDAG8-NEXT:    v_sub_u16_e32 v3, 0, v0
+; SDAG8-NEXT:    v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v3
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v3i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_pk_sub_i16 v2, 0, v0
+; SDAG10-NEXT:    v_pk_sub_i16 v3, 0, v1
+; SDAG10-NEXT:    v_pk_max_i16 v0, v0, v2
+; SDAG10-NEXT:    v_pk_max_i16 v1, v1, v3
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v3i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_pk_sub_i16 v2, 0, v0
+; SDAG1250-NEXT:    v_pk_sub_i16 v3, 0, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, v0, v2
+; SDAG1250-NEXT:    v_pk_max_i16 v1, v1, v3
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v3i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

>From ae98312a97244d8b05213a696d10b8c594828740 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <patrick.simmons at amd.com>
Date: Thu, 6 Nov 2025 11:32:17 -0500
Subject: [PATCH 16/21] Review changes

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 3 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f7dc3a48553d7..85b8333010990 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5305,8 +5305,7 @@ SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
   if (!IsNegative)
     return TruncResult;
 
-  return DAG.getNode(ISD::SUB, DL, N->getValueType(0),
-                     DAG.getConstant(0, DL, N->getValueType(0)), TruncResult);
+  return DAG.getNegative(TruncResult,DL,N->getValueType(0));
 }
 
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 12138503f9708..768c0abd2e3f1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -42,7 +42,6 @@
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/ModRef.h"
 #include "llvm/Transforms/Utils/LowerAtomic.h"

>From 0ea5be657e708742813987d06b68c4ad5b3f9a94 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <patrick.simmons at amd.com>
Date: Thu, 6 Nov 2025 13:10:56 -0500
Subject: [PATCH 17/21] Delete testcase

---
 llvm/test/CodeGen/AMDGPU/llvm.abs.ll | 1487 --------------------------
 1 file changed, 1487 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/llvm.abs.ll

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
deleted file mode 100644
index dd7d2fbc931b6..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
+++ /dev/null
@@ -1,1487 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=SDAG6
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=SDAG8
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=SDAG10
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -o - < %s | FileCheck %s --check-prefixes=SDAG1250
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250
-
-declare i16 @llvm.abs.i16(i16, i1)
-declare i32 @llvm.abs.i32(i32, i1)
-declare i64 @llvm.abs.i64(i64, i1)
-declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
-declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
-declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
-declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
-declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
-
-define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_i8:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_i8:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_i8:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_i8:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_i8:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_sext_i32_i8 s0, s0
-; GFX6-NEXT:    s_abs_i32 s0, s0
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_i8:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_sext_i32_i8 s0, s0
-; GFX8-NEXT:    s_abs_i32 s0, s0
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_i8:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i8 s0, s0
-; GFX10-NEXT:    s_abs_i32 s0, s0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_i8:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_sext_i32_i8 s0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_abs_i32 s0, s0
-; GFX1250-NEXT:    ; return to shader part epilog
-  %res = call i8 @llvm.abs.i8(i8 %arg, i1 false)
-  ret i8 %res
-}
-
-define amdgpu_cs i8 @abs_sgpr_i8_neg(i8 inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_i8_neg:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_i8_neg:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    s_sub_i32 s0, 0, s0
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_i8_neg:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    s_sub_i32 s0, 0, s0
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_i8_neg:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_i8_neg:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_sext_i32_i8 s0, s0
-; GFX6-NEXT:    s_abs_i32 s0, s0
-; GFX6-NEXT:    s_sub_i32 s0, 0, s0
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_i8_neg:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_sext_i32_i8 s0, s0
-; GFX8-NEXT:    s_abs_i32 s0, s0
-; GFX8-NEXT:    s_sub_i32 s0, 0, s0
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_i8_neg:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i8 s0, s0
-; GFX10-NEXT:    s_abs_i32 s0, s0
-; GFX10-NEXT:    s_sub_i32 s0, 0, s0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_i8_neg:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_sext_i32_i8 s0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_abs_i32 s0, s0
-; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
-; GFX1250-NEXT:    ; return to shader part epilog
-  %res1 = call i8 @llvm.abs.i8(i8 %arg, i1 false)
-  %res2 = sub i8 0, %res1
-  ret i8 %res2
-}
-
-define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_i16:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_i16:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_i16:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_i16:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_i16:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_sext_i32_i16 s0, s0
-; GFX6-NEXT:    s_abs_i32 s0, s0
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_sext_i32_i16 s0, s0
-; GFX8-NEXT:    s_abs_i32 s0, s0
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i16 s0, s0
-; GFX10-NEXT:    s_abs_i32 s0, s0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_sext_i32_i16 s0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_abs_i32 s0, s0
-; GFX1250-NEXT:    ; return to shader part epilog
-
-  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
-  ret i16 %res
-}
-
-define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_i16_neg:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_i16_neg:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    s_sub_i32 s0, 0, s0
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_i16_neg:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    s_sub_i32 s0, 0, s0
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_i16_neg:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_i16_neg:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_sext_i32_i16 s0, s0
-; GFX6-NEXT:    s_abs_i32 s0, s0
-; GFX6-NEXT:    s_sub_i32 s0, 0, s0
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_i16_neg:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_sext_i32_i16 s0, s0
-; GFX8-NEXT:    s_abs_i32 s0, s0
-; GFX8-NEXT:    s_sub_i32 s0, 0, s0
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_i16_neg:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i16 s0, s0
-; GFX10-NEXT:    s_abs_i32 s0, s0
-; GFX10-NEXT:    s_sub_i32 s0, 0, s0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_i16_neg:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_sext_i32_i16 s0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_abs_i32 s0, s0
-; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
-; GFX1250-NEXT:    ; return to shader part epilog
-  %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
-  %res2 = sub i16 0, %res1
-  ret i16 %res2
-}
-
-define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_i32:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_i32:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_i32:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_i32:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX-LABEL: abs_sgpr_i32:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_abs_i32 s0, s0
-; GFX-NEXT:    ; return to shader part epilog
-  %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
-  ret i32 %res
-}
-
-define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_i64:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_ashr_i32 s2, s1, 31
-; SDAG6-NEXT:    s_mov_b32 s3, s2
-; SDAG6-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; SDAG6-NEXT:    s_sub_u32 s0, s0, s2
-; SDAG6-NEXT:    s_subb_u32 s1, s1, s2
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_i64:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_ashr_i32 s2, s1, 31
-; SDAG8-NEXT:    s_mov_b32 s3, s2
-; SDAG8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; SDAG8-NEXT:    s_sub_u32 s0, s0, s2
-; SDAG8-NEXT:    s_subb_u32 s1, s1, s2
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_i64:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_ashr_i32 s2, s1, 31
-; SDAG10-NEXT:    s_mov_b32 s3, s2
-; SDAG10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; SDAG10-NEXT:    s_sub_u32 s0, s0, s2
-; SDAG10-NEXT:    s_subb_u32 s1, s1, s2
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_i64:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_sub_nc_u64 s[2:3], 0, s[0:1]
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG1250-NEXT:    v_max_i64 v[0:1], s[0:1], s[2:3]
-; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; SDAG1250-NEXT:    v_readfirstlane_b32 s1, v1
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_i64:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX6-NEXT:    s_add_u32 s0, s0, s2
-; GFX6-NEXT:    s_mov_b32 s3, s2
-; GFX6-NEXT:    s_addc_u32 s1, s1, s2
-; GFX6-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_i64:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX8-NEXT:    s_add_u32 s0, s0, s2
-; GFX8-NEXT:    s_mov_b32 s3, s2
-; GFX8-NEXT:    s_addc_u32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX10-NEXT:    s_add_u32 s0, s0, s2
-; GFX10-NEXT:    s_mov_b32 s3, s2
-; GFX10-NEXT:    s_addc_u32 s1, s1, s2
-; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_i64:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_mov_b32 s3, s2
-; GFX1250-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GFX1250-NEXT:    ; return to shader part epilog
-  %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
-  ret i64 %res
-}
-
-define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_v4i32:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_abs_i32 s3, s3
-; SDAG6-NEXT:    s_abs_i32 s2, s2
-; SDAG6-NEXT:    s_abs_i32 s1, s1
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_v4i32:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_abs_i32 s3, s3
-; SDAG8-NEXT:    s_abs_i32 s2, s2
-; SDAG8-NEXT:    s_abs_i32 s1, s1
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_v4i32:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_abs_i32 s3, s3
-; SDAG10-NEXT:    s_abs_i32 s2, s2
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s1, s1
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_v4i32:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_abs_i32 s3, s3
-; SDAG1250-NEXT:    s_abs_i32 s2, s2
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    s_abs_i32 s1, s1
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX-LABEL: abs_sgpr_v4i32:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_abs_i32 s0, s0
-; GFX-NEXT:    s_abs_i32 s1, s1
-; GFX-NEXT:    s_abs_i32 s2, s2
-; GFX-NEXT:    s_abs_i32 s3, s3
-; GFX-NEXT:    ; return to shader part epilog
-  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
-  ret <4 x i32> %res
-}
-
-define i16 @abs_vgpr_i16(i16 %arg) {
-; SDAG6-LABEL: abs_vgpr_i16:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
-; SDAG6-NEXT:    v_max_i32_e32 v0, v1, v0
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_i16:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_sub_u16_e32 v1, 0, v0
-; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v1
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_i16:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_sub_nc_u16 v1, 0, v0
-; SDAG10-NEXT:    v_max_i16 v0, v0, v1
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_i16:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_sub_nc_u16 v1, 0, v0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG1250-NEXT:    v_max_i16 v0, v0, v1
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_i16:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
-; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_sub_nc_u16 v1, 0, v0
-; GFX10-NEXT:    v_max_i16 v0, v0, v1
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_sub_nc_u16 v1, 0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_max_i16 v0, v0, v1
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
-  ret i16 %res
-}
-
-define i32 @abs_vgpr_i32(i32 %arg) {
-; SDAG6-LABEL: abs_vgpr_i32:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
-; SDAG6-NEXT:    v_max_i32_e32 v0, v1, v0
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_i32:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
-; SDAG8-NEXT:    v_max_i32_e32 v0, v1, v0
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_i32:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
-; SDAG10-NEXT:    v_max_i32_e32 v0, v1, v0
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_i32:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG1250-NEXT:    v_max_i32_e32 v0, v1, v0
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_i32:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_i32:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
-; GFX8-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
-; GFX10-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_i32:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_max_i32_e32 v0, v0, v1
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
-  ret i32 %res
-}
-
-define i64 @abs_vgpr_i64(i64 %arg) {
-; SDAG6-LABEL: abs_vgpr_i64:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; SDAG6-NEXT:    v_xor_b32_e32 v0, v0, v2
-; SDAG6-NEXT:    v_xor_b32_e32 v1, v1, v2
-; SDAG6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; SDAG6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_i64:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; SDAG8-NEXT:    v_xor_b32_e32 v0, v0, v2
-; SDAG8-NEXT:    v_xor_b32_e32 v1, v1, v2
-; SDAG8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2
-; SDAG8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_i64:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; SDAG10-NEXT:    v_xor_b32_e32 v0, v0, v2
-; SDAG10-NEXT:    v_xor_b32_e32 v1, v1, v2
-; SDAG10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2
-; SDAG10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_i64:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_sub_nc_u64_e32 v[2:3], 0, v[0:1]
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG1250-NEXT:    v_max_i64 v[0:1], v[0:1], v[2:3]
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_i64:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
-; GFX6-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GFX6-NEXT:    v_xor_b32_e32 v1, v1, v2
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_i64:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GFX8-NEXT:    v_xor_b32_e32 v1, v1, v2
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v2
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_i64:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_mov_b32_e32 v3, v2
-; GFX1250-NEXT:    v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GFX1250-NEXT:    v_xor_b32_e32 v1, v1, v2
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
-  ret i64 %res
-}
-
-define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
-; SDAG6-LABEL: abs_vgpr_v4i32:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
-; SDAG6-NEXT:    v_max_i32_e32 v0, v4, v0
-; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
-; SDAG6-NEXT:    v_max_i32_e32 v1, v4, v1
-; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
-; SDAG6-NEXT:    v_max_i32_e32 v2, v4, v2
-; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
-; SDAG6-NEXT:    v_max_i32_e32 v3, v4, v3
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_v4i32:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v0
-; SDAG8-NEXT:    v_max_i32_e32 v0, v4, v0
-; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v1
-; SDAG8-NEXT:    v_max_i32_e32 v1, v4, v1
-; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v2
-; SDAG8-NEXT:    v_max_i32_e32 v2, v4, v2
-; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v3
-; SDAG8-NEXT:    v_max_i32_e32 v3, v4, v3
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_v4i32:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
-; SDAG10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
-; SDAG10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
-; SDAG10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
-; SDAG10-NEXT:    v_max_i32_e32 v0, v4, v0
-; SDAG10-NEXT:    v_max_i32_e32 v1, v5, v1
-; SDAG10-NEXT:    v_max_i32_e32 v2, v6, v2
-; SDAG10-NEXT:    v_max_i32_e32 v3, v7, v3
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_v4i32:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1
-; SDAG1250-NEXT:    v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; SDAG1250-NEXT:    v_max_i32_e32 v0, v4, v0
-; SDAG1250-NEXT:    v_max_i32_e32 v1, v5, v1
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG1250-NEXT:    v_max_i32_e32 v2, v6, v2
-; SDAG1250-NEXT:    v_max_i32_e32 v3, v7, v3
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_v4i32:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v4
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
-; GFX6-NEXT:    v_max_i32_e32 v1, v1, v4
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
-; GFX6-NEXT:    v_max_i32_e32 v2, v2, v4
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
-; GFX6-NEXT:    v_max_i32_e32 v3, v3, v4
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_v4i32:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v0
-; GFX8-NEXT:    v_max_i32_e32 v0, v0, v4
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v1
-; GFX8-NEXT:    v_max_i32_e32 v1, v1, v4
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v2
-; GFX8-NEXT:    v_max_i32_e32 v2, v2, v4
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v3
-; GFX8-NEXT:    v_max_i32_e32 v3, v3, v4
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_v4i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
-; GFX10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
-; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
-; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
-; GFX10-NEXT:    v_max_i32_e32 v0, v0, v4
-; GFX10-NEXT:    v_max_i32_e32 v1, v1, v5
-; GFX10-NEXT:    v_max_i32_e32 v2, v2, v6
-; GFX10-NEXT:    v_max_i32_e32 v3, v3, v7
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_v4i32:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_max_i32_e32 v0, v0, v4
-; GFX1250-NEXT:    v_max_i32_e32 v1, v1, v5
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT:    v_max_i32_e32 v2, v2, v6
-; GFX1250-NEXT:    v_max_i32_e32 v3, v3, v7
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
-  ret <4 x i32> %res
-}
-
-define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_v2i8:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i8 s1, s1
-; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s1, s1
-; SDAG6-NEXT:    s_lshl_b32 s2, s1, 8
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_or_b32 s0, s0, s2
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_v2i8:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_bfe_i32 s1, s1, 0x80000
-; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_abs_i32 s1, s1
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    s_lshl_b32 s2, s1, 8
-; SDAG8-NEXT:    s_or_b32 s0, s0, s2
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_v2i8:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_bfe_i32 s1, s1, 0x80000
-; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG10-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s1, s1
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    s_lshl_b32 s2, s1, 8
-; SDAG10-NEXT:    s_or_b32 s0, s0, s2
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_v2i8:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_bfe_i32 s1, s1, 0x80000
-; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG1250-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG1250-NEXT:    s_abs_i32 s1, s1
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    s_lshl_b32 s2, s1, 8
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; SDAG1250-NEXT:    s_or_b32 s0, s0, s2
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX-LABEL: abs_sgpr_v2i8:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_sext_i32_i8 s0, s0
-; GFX-NEXT:    s_sext_i32_i8 s1, s1
-; GFX-NEXT:    s_abs_i32 s0, s0
-; GFX-NEXT:    s_abs_i32 s1, s1
-; GFX-NEXT:    ; return to shader part epilog
-  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
-  ret <2 x i8> %res
-}
-
-define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
-; SDAG6-LABEL: abs_vgpr_v2i8:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
-; SDAG6-NEXT:    v_max_i32_e32 v0, v2, v0
-; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
-; SDAG6-NEXT:    v_max_i32_e32 v1, v2, v1
-; SDAG6-NEXT:    v_lshlrev_b32_e32 v2, 8, v1
-; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v2
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_v2i8:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG8-NEXT:    v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; SDAG8-NEXT:    v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; SDAG8-NEXT:    v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; SDAG8-NEXT:    v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; SDAG8-NEXT:    v_lshlrev_b16_e32 v2, 8, v1
-; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v2
-; SDAG8-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_v2i8:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; SDAG10-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; SDAG10-NEXT:    v_sub_nc_u16 v2, 0, v1
-; SDAG10-NEXT:    v_sub_nc_u16 v3, 0, v0
-; SDAG10-NEXT:    v_max_i16 v1, v1, v2
-; SDAG10-NEXT:    v_max_i16 v0, v0, v3
-; SDAG10-NEXT:    v_lshlrev_b16 v2, 8, v1
-; SDAG10-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; SDAG10-NEXT:    v_or_b32_e32 v0, v0, v2
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_v2i8:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; SDAG1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG1250-NEXT:    v_sub_nc_u16 v2, 0, v1
-; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG1250-NEXT:    v_max_i16 v1, v1, v2
-; SDAG1250-NEXT:    v_max_i16 v0, v0, v3
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; SDAG1250-NEXT:    v_lshlrev_b16 v2, 8, v1
-; SDAG1250-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; SDAG1250-NEXT:    v_or_b32_e32 v0, v0, v2
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_v2i8:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
-; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
-; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_v2i8:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-NEXT:    v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX8-NEXT:    v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_v2i8:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX10-NEXT:    v_sub_nc_u16 v2, 0, v0
-; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
-; GFX10-NEXT:    v_max_i16 v0, v0, v2
-; GFX10-NEXT:    v_max_i16 v1, v1, v3
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_v2i8:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_sub_nc_u16 v2, 0, v0
-; GFX1250-NEXT:    v_sub_nc_u16 v3, 0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_max_i16 v0, v0, v2
-; GFX1250-NEXT:    v_max_i16 v1, v1, v3
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
-  ret <2 x i8> %res
-}
-
-define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_v3i8:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i8 s1, s1
-; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
-; SDAG6-NEXT:    s_sext_i32_i8 s2, s2
-; SDAG6-NEXT:    s_abs_i32 s1, s1
-; SDAG6-NEXT:    s_abs_i32 s2, s2
-; SDAG6-NEXT:    s_lshl_b32 s1, s1, 8
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_lshl_b32 s3, s2, 16
-; SDAG6-NEXT:    s_or_b32 s0, s0, s1
-; SDAG6-NEXT:    s_or_b32 s0, s0, s3
-; SDAG6-NEXT:    s_lshr_b32 s1, s0, 8
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_v3i8:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_bfe_i32 s1, s1, 0x80000
-; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_abs_i32 s1, s1
-; SDAG8-NEXT:    s_bfe_i32 s2, s2, 0x80000
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    s_sext_i32_i16 s2, s2
-; SDAG8-NEXT:    s_lshl_b32 s1, s1, 8
-; SDAG8-NEXT:    s_abs_i32 s2, s2
-; SDAG8-NEXT:    s_or_b32 s0, s0, s1
-; SDAG8-NEXT:    s_lshl_b32 s3, s2, 16
-; SDAG8-NEXT:    s_and_b32 s1, s0, 0xffff
-; SDAG8-NEXT:    s_or_b32 s1, s1, s3
-; SDAG8-NEXT:    s_lshr_b32 s1, s1, 8
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_v3i8:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_bfe_i32 s1, s1, 0x80000
-; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG10-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG10-NEXT:    s_bfe_i32 s2, s2, 0x80000
-; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG10-NEXT:    s_abs_i32 s1, s1
-; SDAG10-NEXT:    s_abs_i32 s0, s0
-; SDAG10-NEXT:    s_lshl_b32 s1, s1, 8
-; SDAG10-NEXT:    s_sext_i32_i16 s2, s2
-; SDAG10-NEXT:    s_or_b32 s0, s0, s1
-; SDAG10-NEXT:    s_abs_i32 s2, s2
-; SDAG10-NEXT:    s_and_b32 s1, s0, 0xffff
-; SDAG10-NEXT:    s_lshl_b32 s3, s2, 16
-; SDAG10-NEXT:    s_or_b32 s1, s1, s3
-; SDAG10-NEXT:    s_lshr_b32 s1, s1, 8
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_v3i8:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_bfe_i32 s1, s1, 0x80000
-; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
-; SDAG1250-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG1250-NEXT:    s_bfe_i32 s2, s2, 0x80000
-; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG1250-NEXT:    s_abs_i32 s1, s1
-; SDAG1250-NEXT:    s_abs_i32 s0, s0
-; SDAG1250-NEXT:    s_lshl_b32 s1, s1, 8
-; SDAG1250-NEXT:    s_sext_i32_i16 s2, s2
-; SDAG1250-NEXT:    s_or_b32 s0, s0, s1
-; SDAG1250-NEXT:    s_abs_i32 s2, s2
-; SDAG1250-NEXT:    s_and_b32 s1, s0, 0xffff
-; SDAG1250-NEXT:    s_lshl_b32 s3, s2, 16
-; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; SDAG1250-NEXT:    s_or_b32 s1, s1, s3
-; SDAG1250-NEXT:    s_lshr_b32 s1, s1, 8
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX-LABEL: abs_sgpr_v3i8:
-; GFX:       ; %bb.0:
-; GFX-NEXT:    s_sext_i32_i8 s0, s0
-; GFX-NEXT:    s_sext_i32_i8 s1, s1
-; GFX-NEXT:    s_sext_i32_i8 s2, s2
-; GFX-NEXT:    s_abs_i32 s0, s0
-; GFX-NEXT:    s_abs_i32 s1, s1
-; GFX-NEXT:    s_abs_i32 s2, s2
-; GFX-NEXT:    ; return to shader part epilog
-  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
-  ret <3 x i8> %res
-}
-
-define <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
-; SDAG6-LABEL: abs_vgpr_v3i8:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
-; SDAG6-NEXT:    v_max_i32_e32 v0, v3, v0
-; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
-; SDAG6-NEXT:    v_max_i32_e32 v1, v3, v1
-; SDAG6-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
-; SDAG6-NEXT:    v_max_i32_e32 v2, v1, v2
-; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG6-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_v3i8:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; SDAG8-NEXT:    v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; SDAG8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; SDAG8-NEXT:    v_max_i16_sdwa v1, sext(v1), v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG8-NEXT:    v_sub_u16_sdwa v1, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; SDAG8-NEXT:    v_max_i16_sdwa v2, sext(v2), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; SDAG8-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; SDAG8-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG8-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_v3i8:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; SDAG10-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; SDAG10-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; SDAG10-NEXT:    v_sub_nc_u16 v3, 0, v1
-; SDAG10-NEXT:    v_sub_nc_u16 v4, 0, v0
-; SDAG10-NEXT:    v_sub_nc_u16 v5, 0, v2
-; SDAG10-NEXT:    v_max_i16 v1, v1, v3
-; SDAG10-NEXT:    v_max_i16 v0, v0, v4
-; SDAG10-NEXT:    v_max_i16 v2, v2, v5
-; SDAG10-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG10-NEXT:    v_lshlrev_b32_e32 v3, 16, v2
-; SDAG10-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG10-NEXT:    v_or_b32_sdwa v1, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG10-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_v3i8:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; SDAG1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; SDAG1250-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v1
-; SDAG1250-NEXT:    v_sub_nc_u16 v4, 0, v0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG1250-NEXT:    v_max_i16 v1, v1, v3
-; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v2
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; SDAG1250-NEXT:    v_max_i16 v0, v0, v4
-; SDAG1250-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG1250-NEXT:    v_max_i16 v2, v2, v3
-; SDAG1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_bitop2_b32 v0, v0, v1 bitop3:0x54
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG1250-NEXT:    v_and_b32_e32 v3, 0xffff, v0
-; SDAG1250-NEXT:    v_or_b32_e32 v1, v3, v1
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG1250-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_v3i8:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
-; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
-; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
-; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_v3i8:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0
-; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX8-NEXT:    v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX8-NEXT:    v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_v3i8:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX10-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v0
-; GFX10-NEXT:    v_sub_nc_u16 v4, 0, v1
-; GFX10-NEXT:    v_sub_nc_u16 v5, 0, v2
-; GFX10-NEXT:    v_max_i16 v0, v0, v3
-; GFX10-NEXT:    v_max_i16 v1, v1, v4
-; GFX10-NEXT:    v_max_i16 v2, v2, v5
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_v3i8:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX1250-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_sub_nc_u16 v3, 0, v0
-; GFX1250-NEXT:    v_sub_nc_u16 v4, 0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_sub_nc_u16 v5, 0, v2
-; GFX1250-NEXT:    v_max_i16 v0, v0, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_max_i16 v1, v1, v4
-; GFX1250-NEXT:    v_max_i16 v2, v2, v5
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
-  ret <3 x i8> %res
-}
-
-define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_v2i16:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG6-NEXT:    s_abs_i32 s1, s1
-; SDAG6-NEXT:    s_lshl_b32 s2, s1, 16
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_or_b32 s0, s0, s2
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_v2i16:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_ashr_i32 s1, s0, 16
-; SDAG8-NEXT:    s_abs_i32 s1, s1
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_lshl_b32 s1, s1, 16
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    s_or_b32 s0, s0, s1
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_v2i16:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    v_pk_sub_i16 v0, 0, s0
-; SDAG10-NEXT:    v_pk_max_i16 v0, s0, v0
-; SDAG10-NEXT:    v_readfirstlane_b32 s0, v0
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_v2i16:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    v_pk_sub_i16 v0, 0, s0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG1250-NEXT:    v_pk_max_i16 v0, s0, v0
-; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_v2i16:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_sext_i32_i16 s0, s0
-; GFX6-NEXT:    s_sext_i32_i16 s1, s1
-; GFX6-NEXT:    s_abs_i32 s0, s0
-; GFX6-NEXT:    s_abs_i32 s1, s1
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_v2i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sext_i32_i16 s0, s0
-; GFX8-NEXT:    s_abs_i32 s1, s1
-; GFX8-NEXT:    s_abs_i32 s0, s0
-; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX8-NEXT:    s_or_b32 s0, s0, s1
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_v2i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i16 s1, s0
-; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
-; GFX10-NEXT:    s_abs_i32 s1, s1
-; GFX10-NEXT:    s_abs_i32 s0, s0
-; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_v2i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_sext_i32_i16 s1, s0
-; GFX1250-NEXT:    s_ashr_i32 s0, s0, 16
-; GFX1250-NEXT:    s_abs_i32 s1, s1
-; GFX1250-NEXT:    s_abs_i32 s0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
-; GFX1250-NEXT:    ; return to shader part epilog
-  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
-  ret <2 x i16> %res
-}
-
-define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
-; SDAG6-LABEL: abs_vgpr_v2i16:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 16
-; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
-; SDAG6-NEXT:    v_max_i32_e32 v0, v2, v0
-; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
-; SDAG6-NEXT:    v_max_i32_e32 v1, v2, v1
-; SDAG6-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v2
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_v2i16:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_mov_b32_e32 v1, 0
-; SDAG8-NEXT:    v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; SDAG8-NEXT:    v_sub_u16_e32 v2, 0, v0
-; SDAG8-NEXT:    v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v2
-; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_v2i16:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_pk_sub_i16 v1, 0, v0
-; SDAG10-NEXT:    v_pk_max_i16 v0, v0, v1
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_v2i16:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_pk_sub_i16 v1, 0, v0
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG1250-NEXT:    v_pk_max_i16 v0, v0, v1
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_v2i16:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
-; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
-; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_v2i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0
-; GFX8-NEXT:    v_sub_u16_e32 v1, 0, v0
-; GFX8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_max_i16_e32 v1, v0, v1
-; GFX8-NEXT:    v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_v2i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_sub_i16 v1, 0, v0
-; GFX10-NEXT:    v_pk_max_i16 v0, v0, v1
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_v2i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_pk_sub_i16 v1, 0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_pk_max_i16 v0, v0, v1
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
-  ret <2 x i16> %res
-}
-
-define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
-; SDAG6-LABEL: abs_sgpr_v3i16:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG6-NEXT:    s_sext_i32_i16 s3, s2
-; SDAG6-NEXT:    s_abs_i32 s1, s1
-; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG6-NEXT:    s_lshl_b32 s2, s1, 16
-; SDAG6-NEXT:    s_abs_i32 s3, s3
-; SDAG6-NEXT:    s_abs_i32 s0, s0
-; SDAG6-NEXT:    s_lshr_b64 s[4:5], s[2:3], 16
-; SDAG6-NEXT:    s_or_b32 s0, s0, s2
-; SDAG6-NEXT:    s_mov_b32 s1, s4
-; SDAG6-NEXT:    s_mov_b32 s2, s3
-; SDAG6-NEXT:    ; return to shader part epilog
-;
-; SDAG8-LABEL: abs_sgpr_v3i16:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_ashr_i32 s2, s0, 16
-; SDAG8-NEXT:    s_abs_i32 s2, s2
-; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
-; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
-; SDAG8-NEXT:    s_abs_i32 s0, s0
-; SDAG8-NEXT:    s_lshl_b32 s2, s2, 16
-; SDAG8-NEXT:    s_abs_i32 s1, s1
-; SDAG8-NEXT:    s_or_b32 s0, s0, s2
-; SDAG8-NEXT:    ; return to shader part epilog
-;
-; SDAG10-LABEL: abs_sgpr_v3i16:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    v_pk_sub_i16 v0, 0, s0
-; SDAG10-NEXT:    v_pk_sub_i16 v1, 0, s1
-; SDAG10-NEXT:    v_pk_max_i16 v0, s0, v0
-; SDAG10-NEXT:    v_pk_max_i16 v1, s1, v1
-; SDAG10-NEXT:    v_readfirstlane_b32 s0, v0
-; SDAG10-NEXT:    v_readfirstlane_b32 s1, v1
-; SDAG10-NEXT:    ; return to shader part epilog
-;
-; SDAG1250-LABEL: abs_sgpr_v3i16:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    v_pk_sub_i16 v0, 0, s0
-; SDAG1250-NEXT:    v_pk_sub_i16 v1, 0, s1
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG1250-NEXT:    v_pk_max_i16 v0, s0, v0
-; SDAG1250-NEXT:    v_pk_max_i16 v1, s1, v1
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
-; SDAG1250-NEXT:    v_readfirstlane_b32 s1, v1
-; SDAG1250-NEXT:    ; return to shader part epilog
-;
-; GFX6-LABEL: abs_sgpr_v3i16:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_sext_i32_i16 s0, s0
-; GFX6-NEXT:    s_sext_i32_i16 s1, s1
-; GFX6-NEXT:    s_sext_i32_i16 s2, s2
-; GFX6-NEXT:    s_abs_i32 s0, s0
-; GFX6-NEXT:    s_abs_i32 s1, s1
-; GFX6-NEXT:    s_abs_i32 s2, s2
-; GFX6-NEXT:    ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_v3i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sext_i32_i16 s0, s0
-; GFX8-NEXT:    s_abs_i32 s2, s2
-; GFX8-NEXT:    s_abs_i32 s0, s0
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
-; GFX8-NEXT:    s_abs_i32 s1, s1
-; GFX8-NEXT:    s_or_b32 s0, s0, s2
-; GFX8-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_v3i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i16 s2, s0
-; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
-; GFX10-NEXT:    s_abs_i32 s2, s2
-; GFX10-NEXT:    s_abs_i32 s0, s0
-; GFX10-NEXT:    s_sext_i32_i16 s1, s1
-; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
-; GFX10-NEXT:    s_abs_i32 s1, s1
-; GFX10-NEXT:    ; return to shader part epilog
-;
-; GFX1250-LABEL: abs_sgpr_v3i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_sext_i32_i16 s2, s0
-; GFX1250-NEXT:    s_ashr_i32 s0, s0, 16
-; GFX1250-NEXT:    s_abs_i32 s2, s2
-; GFX1250-NEXT:    s_abs_i32 s0, s0
-; GFX1250-NEXT:    s_sext_i32_i16 s1, s1
-; GFX1250-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
-; GFX1250-NEXT:    s_abs_i32 s1, s1
-; GFX1250-NEXT:    ; return to shader part epilog
-  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
-  ret <3 x i16> %res
-}
-
-define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
-; SDAG6-LABEL: abs_vgpr_v3i16:
-; SDAG6:       ; %bb.0:
-; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 16
-; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
-; SDAG6-NEXT:    v_bfe_i32 v2, v2, 0, 16
-; SDAG6-NEXT:    v_max_i32_e32 v0, v3, v0
-; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
-; SDAG6-NEXT:    v_max_i32_e32 v1, v3, v1
-; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
-; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; SDAG6-NEXT:    v_max_i32_e32 v2, v3, v2
-; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG6-NEXT:    v_alignbit_b32 v1, v2, v1, 16
-; SDAG6-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG8-LABEL: abs_vgpr_v3i16:
-; SDAG8:       ; %bb.0:
-; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG8-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG8-NEXT:    v_sub_u16_e32 v3, 0, v1
-; SDAG8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; SDAG8-NEXT:    v_max_i16_e32 v1, v1, v3
-; SDAG8-NEXT:    v_sub_u16_e32 v3, 0, v0
-; SDAG8-NEXT:    v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v3
-; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v2
-; SDAG8-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG10-LABEL: abs_vgpr_v3i16:
-; SDAG10:       ; %bb.0:
-; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG10-NEXT:    v_pk_sub_i16 v2, 0, v0
-; SDAG10-NEXT:    v_pk_sub_i16 v3, 0, v1
-; SDAG10-NEXT:    v_pk_max_i16 v0, v0, v2
-; SDAG10-NEXT:    v_pk_max_i16 v1, v1, v3
-; SDAG10-NEXT:    s_setpc_b64 s[30:31]
-;
-; SDAG1250-LABEL: abs_vgpr_v3i16:
-; SDAG1250:       ; %bb.0:
-; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG1250-NEXT:    s_wait_kmcnt 0x0
-; SDAG1250-NEXT:    v_pk_sub_i16 v2, 0, v0
-; SDAG1250-NEXT:    v_pk_sub_i16 v3, 0, v1
-; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG1250-NEXT:    v_pk_max_i16 v0, v0, v2
-; SDAG1250-NEXT:    v_pk_max_i16 v1, v1, v3
-; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
-;
-; GFX6-LABEL: abs_vgpr_v3i16:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
-; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
-; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
-; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
-; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
-; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: abs_vgpr_v3i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0
-; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v0
-; GFX8-NEXT:    v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_sub_u16_e32 v4, 0, v1
-; GFX8-NEXT:    v_max_i16_e32 v2, v0, v2
-; GFX8-NEXT:    v_max_i16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
-; GFX8-NEXT:    v_max_i16_e32 v1, v1, v4
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: abs_vgpr_v3i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_pk_sub_i16 v2, 0, v0
-; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
-; GFX10-NEXT:    v_pk_max_i16 v0, v0, v2
-; GFX10-NEXT:    v_max_i16 v1, v1, v3
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1250-LABEL: abs_vgpr_v3i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_pk_sub_i16 v2, 0, v0
-; GFX1250-NEXT:    v_sub_nc_u16 v3, 0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_pk_max_i16 v0, v0, v2
-; GFX1250-NEXT:    v_max_i16 v1, v1, v3
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
-  ret <3 x i16> %res
-}

>From 1f11d6b07031f18a7b36bde35d2ff83a40de1c18 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <patrick.simmons at amd.com>
Date: Thu, 6 Nov 2025 13:11:24 -0500
Subject: [PATCH 18/21] Move file

---
 llvm/test/CodeGen/AMDGPU/{GlobalISel => }/llvm.abs.ll | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/AMDGPU/{GlobalISel => }/llvm.abs.ll (100%)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
rename to llvm/test/CodeGen/AMDGPU/llvm.abs.ll

>From d6c95213a5c18536fb05478011589a296f499e7e Mon Sep 17 00:00:00 2001
From: Patrick Simmons <patrick.simmons at amd.com>
Date: Thu, 6 Nov 2025 13:12:07 -0500
Subject: [PATCH 19/21] Restore testcase

---
 llvm/test/CodeGen/AMDGPU/llvm.abs.ll | 887 +++++++++++++++++++++++++++
 1 file changed, 887 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
index 6facdfdec64ae..dd7d2fbc931b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.abs.ll
@@ -1,4 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=SDAG6
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=SDAG8
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=SDAG10
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -o - < %s | FileCheck %s --check-prefixes=SDAG1250
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
@@ -13,7 +17,152 @@ declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
 declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
 
+define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
+; GFX6-LABEL: abs_sgpr_i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i8 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i8 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i8 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i8:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i8 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res = call i8 @llvm.abs.i8(i8 %arg, i1 false)
+  ret i8 %res
+}
+
+define amdgpu_cs i8 @abs_sgpr_i8_neg(i8 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i8_neg:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i8_neg:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i8_neg:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i8_neg:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
+; GFX6-LABEL: abs_sgpr_i8_neg:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i8 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_sub_i32 s0, 0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i8_neg:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i8 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_sub_i32 s0, 0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i8_neg:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i8 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_sub_i32 s0, 0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i8_neg:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i8 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res1 = call i8 @llvm.abs.i8(i8 %arg, i1 false)
+  %res2 = sub i8 0, %res1
+  ret i8 %res2
+}
+
 define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_sext_i32_i16 s0, s0
@@ -38,11 +187,95 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
 ; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1250-NEXT:    s_abs_i32 s0, s0
 ; GFX1250-NEXT:    ; return to shader part epilog
+
   %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   ret i16 %res
 }
 
+define amdgpu_ps i16 @abs_sgpr_i16_neg(i16 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i16_neg:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i16_neg:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i16_neg:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_sub_i32 s0, 0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i16_neg:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
+; GFX6-LABEL: abs_sgpr_i16_neg:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_sub_i32 s0, 0, s0
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_i16_neg:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_sub_i32 s0, 0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_i16_neg:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s0, s0
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_sub_i32 s0, 0, s0
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: abs_sgpr_i16_neg:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_sext_i32_i16 s0, s0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT:    s_abs_i32 s0, s0
+; GFX1250-NEXT:    s_sub_co_i32 s0, 0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
+  %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false)
+  %res2 = sub i16 0, %res1
+  ret i16 %res2
+}
+
 define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_i32:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_abs_i32 s0, s0
@@ -52,6 +285,43 @@ define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
 }
 
 define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_i64:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_ashr_i32 s2, s1, 31
+; SDAG6-NEXT:    s_mov_b32 s3, s2
+; SDAG6-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; SDAG6-NEXT:    s_sub_u32 s0, s0, s2
+; SDAG6-NEXT:    s_subb_u32 s1, s1, s2
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_i64:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_ashr_i32 s2, s1, 31
+; SDAG8-NEXT:    s_mov_b32 s3, s2
+; SDAG8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; SDAG8-NEXT:    s_sub_u32 s0, s0, s2
+; SDAG8-NEXT:    s_subb_u32 s1, s1, s2
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_i64:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_ashr_i32 s2, s1, 31
+; SDAG10-NEXT:    s_mov_b32 s3, s2
+; SDAG10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; SDAG10-NEXT:    s_sub_u32 s0, s0, s2
+; SDAG10-NEXT:    s_subb_u32 s1, s1, s2
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_i64:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_sub_nc_u64 s[2:3], 0, s[0:1]
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i64 v[0:1], s[0:1], s[2:3]
+; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; SDAG1250-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s2, s1, 31
@@ -93,6 +363,38 @@ define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
 }
 
 define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v4i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_abs_i32 s3, s3
+; SDAG6-NEXT:    s_abs_i32 s2, s2
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v4i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_abs_i32 s3, s3
+; SDAG8-NEXT:    s_abs_i32 s2, s2
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v4i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_abs_i32 s3, s3
+; SDAG10-NEXT:    s_abs_i32 s2, s2
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s1, s1
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v4i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_abs_i32 s3, s3
+; SDAG1250-NEXT:    s_abs_i32 s2, s2
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s1, s1
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_v4i32:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_abs_i32 s0, s0
@@ -105,6 +407,37 @@ define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
 }
 
 define i16 @abs_vgpr_i16(i16 %arg) {
+; SDAG6-LABEL: abs_vgpr_i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_sub_u16_e32 v1, 0, v0
+; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_sub_nc_u16 v1, 0, v0
+; SDAG10-NEXT:    v_max_i16 v0, v0, v1
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_sub_nc_u16 v1, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i16 v0, v0, v1
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -140,6 +473,36 @@ define i16 @abs_vgpr_i16(i16 %arg) {
 }
 
 define i32 @abs_vgpr_i32(i32 %arg) {
+; SDAG6-LABEL: abs_vgpr_i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_sub_u32_e32 v1, vcc, 0, v0
+; SDAG8-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; SDAG10-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i32_e32 v0, v1, v0
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -174,6 +537,45 @@ define i32 @abs_vgpr_i32(i32 %arg) {
 }
 
 define i64 @abs_vgpr_i64(i64 %arg) {
+; SDAG6-LABEL: abs_vgpr_i64:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; SDAG6-NEXT:    v_xor_b32_e32 v0, v0, v2
+; SDAG6-NEXT:    v_xor_b32_e32 v1, v1, v2
+; SDAG6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; SDAG6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_i64:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; SDAG8-NEXT:    v_xor_b32_e32 v0, v0, v2
+; SDAG8-NEXT:    v_xor_b32_e32 v1, v1, v2
+; SDAG8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2
+; SDAG8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_i64:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; SDAG10-NEXT:    v_xor_b32_e32 v0, v0, v2
+; SDAG10-NEXT:    v_xor_b32_e32 v1, v1, v2
+; SDAG10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2
+; SDAG10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_i64:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_sub_nc_u64_e32 v[2:3], 0, v[0:1]
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i64 v[0:1], v[0:1], v[2:3]
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -221,6 +623,59 @@ define i64 @abs_vgpr_i64(i64 %arg) {
 }
 
 define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
+; SDAG6-LABEL: abs_vgpr_v4i32:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v4, v1
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v2
+; SDAG6-NEXT:    v_max_i32_e32 v2, v4, v2
+; SDAG6-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; SDAG6-NEXT:    v_max_i32_e32 v3, v4, v3
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v4i32:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v0
+; SDAG8-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v1
+; SDAG8-NEXT:    v_max_i32_e32 v1, v4, v1
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v2
+; SDAG8-NEXT:    v_max_i32_e32 v2, v4, v2
+; SDAG8-NEXT:    v_sub_u32_e32 v4, vcc, 0, v3
+; SDAG8-NEXT:    v_max_i32_e32 v3, v4, v3
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v4i32:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
+; SDAG10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
+; SDAG10-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG10-NEXT:    v_max_i32_e32 v1, v5, v1
+; SDAG10-NEXT:    v_max_i32_e32 v2, v6, v2
+; SDAG10-NEXT:    v_max_i32_e32 v3, v7, v3
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v4i32:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_dual_sub_nc_u32 v4, 0, v0 :: v_dual_sub_nc_u32 v5, 0, v1
+; SDAG1250-NEXT:    v_dual_sub_nc_u32 v6, 0, v2 :: v_dual_sub_nc_u32 v7, 0, v3
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG1250-NEXT:    v_max_i32_e32 v0, v4, v0
+; SDAG1250-NEXT:    v_max_i32_e32 v1, v5, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; SDAG1250-NEXT:    v_max_i32_e32 v2, v6, v2
+; SDAG1250-NEXT:    v_max_i32_e32 v3, v7, v3
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,6 +733,53 @@ define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
 }
 
 define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v2i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_or_b32 s0, s0, s2
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v2i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG8-NEXT:    s_or_b32 s0, s0, s2
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v2i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s1, s1
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG10-NEXT:    s_or_b32 s0, s0, s2
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v2i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG1250-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s1, s1
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_lshl_b32 s2, s1, 8
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_or_b32 s0, s0, s2
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_v2i8:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_sext_i32_i8 s0, s0
@@ -290,6 +792,64 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
 }
 
 define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
+; SDAG6-LABEL: abs_vgpr_v2i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v2, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v2, v1
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v2, 8, v1
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v2i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG8-NEXT:    v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_lshlrev_b16_e32 v2, 8, v1
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG8-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v2i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG10-NEXT:    v_sub_nc_u16 v2, 0, v1
+; SDAG10-NEXT:    v_sub_nc_u16 v3, 0, v0
+; SDAG10-NEXT:    v_max_i16 v1, v1, v2
+; SDAG10-NEXT:    v_max_i16 v0, v0, v3
+; SDAG10-NEXT:    v_lshlrev_b16 v2, 8, v1
+; SDAG10-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; SDAG10-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v2i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_sub_nc_u16 v2, 0, v1
+; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_max_i16 v1, v1, v2
+; SDAG1250-NEXT:    v_max_i16 v0, v0, v3
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_lshlrev_b16 v2, 8, v1
+; SDAG1250-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; SDAG1250-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v2i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -340,6 +900,79 @@ define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
 }
 
 define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v3i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i8 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i8 s0, s0
+; SDAG6-NEXT:    s_sext_i32_i8 s2, s2
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_abs_i32 s2, s2
+; SDAG6-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG6-NEXT:    s_or_b32 s0, s0, s1
+; SDAG6-NEXT:    s_or_b32 s0, s0, s3
+; SDAG6-NEXT:    s_lshr_b32 s1, s0, 8
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v3i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG8-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_bfe_i32 s2, s2, 0x80000
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_sext_i32_i16 s2, s2
+; SDAG8-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG8-NEXT:    s_abs_i32 s2, s2
+; SDAG8-NEXT:    s_or_b32 s0, s0, s1
+; SDAG8-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG8-NEXT:    s_and_b32 s1, s0, 0xffff
+; SDAG8-NEXT:    s_or_b32 s1, s1, s3
+; SDAG8-NEXT:    s_lshr_b32 s1, s1, 8
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v3i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG10-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG10-NEXT:    s_bfe_i32 s2, s2, 0x80000
+; SDAG10-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG10-NEXT:    s_abs_i32 s1, s1
+; SDAG10-NEXT:    s_abs_i32 s0, s0
+; SDAG10-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG10-NEXT:    s_sext_i32_i16 s2, s2
+; SDAG10-NEXT:    s_or_b32 s0, s0, s1
+; SDAG10-NEXT:    s_abs_i32 s2, s2
+; SDAG10-NEXT:    s_and_b32 s1, s0, 0xffff
+; SDAG10-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG10-NEXT:    s_or_b32 s1, s1, s3
+; SDAG10-NEXT:    s_lshr_b32 s1, s1, 8
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v3i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_bfe_i32 s1, s1, 0x80000
+; SDAG1250-NEXT:    s_bfe_i32 s0, s0, 0x80000
+; SDAG1250-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG1250-NEXT:    s_bfe_i32 s2, s2, 0x80000
+; SDAG1250-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG1250-NEXT:    s_abs_i32 s1, s1
+; SDAG1250-NEXT:    s_abs_i32 s0, s0
+; SDAG1250-NEXT:    s_lshl_b32 s1, s1, 8
+; SDAG1250-NEXT:    s_sext_i32_i16 s2, s2
+; SDAG1250-NEXT:    s_or_b32 s0, s0, s1
+; SDAG1250-NEXT:    s_abs_i32 s2, s2
+; SDAG1250-NEXT:    s_and_b32 s1, s0, 0xffff
+; SDAG1250-NEXT:    s_lshl_b32 s3, s2, 16
+; SDAG1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; SDAG1250-NEXT:    s_or_b32 s1, s1, s3
+; SDAG1250-NEXT:    s_lshr_b32 s1, s1, 8
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX-LABEL: abs_sgpr_v3i8:
 ; GFX:       ; %bb.0:
 ; GFX-NEXT:    s_sext_i32_i8 s0, s0
@@ -354,6 +987,86 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
 }
 
 define <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
+; SDAG6-LABEL: abs_vgpr_v3i8:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v3, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v3, v1
+; SDAG6-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG6-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
+; SDAG6-NEXT:    v_max_i32_e32 v2, v1, v2
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG6-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v3i8:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v3, 0
+; SDAG8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v1, sext(v1), v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG8-NEXT:    v_sub_u16_sdwa v1, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG8-NEXT:    v_max_i16_sdwa v2, sext(v2), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SDAG8-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDAG8-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v3i8:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG10-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; SDAG10-NEXT:    v_sub_nc_u16 v3, 0, v1
+; SDAG10-NEXT:    v_sub_nc_u16 v4, 0, v0
+; SDAG10-NEXT:    v_sub_nc_u16 v5, 0, v2
+; SDAG10-NEXT:    v_max_i16 v1, v1, v3
+; SDAG10-NEXT:    v_max_i16 v0, v0, v4
+; SDAG10-NEXT:    v_max_i16 v2, v2, v5
+; SDAG10-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG10-NEXT:    v_lshlrev_b32_e32 v3, 16, v2
+; SDAG10-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG10-NEXT:    v_or_b32_sdwa v1, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDAG10-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v3i8:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; SDAG1250-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; SDAG1250-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v1
+; SDAG1250-NEXT:    v_sub_nc_u16 v4, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; SDAG1250-NEXT:    v_max_i16 v1, v1, v3
+; SDAG1250-NEXT:    v_sub_nc_u16 v3, 0, v2
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG1250-NEXT:    v_max_i16 v0, v0, v4
+; SDAG1250-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_max_i16 v2, v2, v3
+; SDAG1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_bitop2_b32 v0, v0, v1 bitop3:0x54
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_and_b32_e32 v3, 0xffff, v0
+; SDAG1250-NEXT:    v_or_b32_e32 v1, v3, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v3i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -416,6 +1129,41 @@ define <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
 }
 
 define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v2i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_lshl_b32 s2, s1, 16
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_or_b32 s0, s0, s2
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v2i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_ashr_i32 s1, s0, 16
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_lshl_b32 s1, s1, 16
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_or_b32 s0, s0, s1
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v2i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG10-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG10-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v2i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_v2i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_sext_i32_i16 s0, s0
@@ -458,6 +1206,46 @@ define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
 }
 
 define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
+; SDAG6-LABEL: abs_vgpr_v2i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; SDAG6-NEXT:    v_max_i32_e32 v0, v2, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v2, v1
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v2i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG8-NEXT:    v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDAG8-NEXT:    v_sub_u16_e32 v2, 0, v0
+; SDAG8-NEXT:    v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v2
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v2i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_pk_sub_i16 v1, 0, v0
+; SDAG10-NEXT:    v_pk_max_i16 v0, v0, v1
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v2i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_pk_sub_i16 v1, 0, v0
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, v0, v1
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v2i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -500,6 +1288,55 @@ define <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
 }
 
 define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
+; SDAG6-LABEL: abs_sgpr_v3i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i16 s3, s2
+; SDAG6-NEXT:    s_abs_i32 s1, s1
+; SDAG6-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG6-NEXT:    s_lshl_b32 s2, s1, 16
+; SDAG6-NEXT:    s_abs_i32 s3, s3
+; SDAG6-NEXT:    s_abs_i32 s0, s0
+; SDAG6-NEXT:    s_lshr_b64 s[4:5], s[2:3], 16
+; SDAG6-NEXT:    s_or_b32 s0, s0, s2
+; SDAG6-NEXT:    s_mov_b32 s1, s4
+; SDAG6-NEXT:    s_mov_b32 s2, s3
+; SDAG6-NEXT:    ; return to shader part epilog
+;
+; SDAG8-LABEL: abs_sgpr_v3i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_ashr_i32 s2, s0, 16
+; SDAG8-NEXT:    s_abs_i32 s2, s2
+; SDAG8-NEXT:    s_sext_i32_i16 s0, s0
+; SDAG8-NEXT:    s_sext_i32_i16 s1, s1
+; SDAG8-NEXT:    s_abs_i32 s0, s0
+; SDAG8-NEXT:    s_lshl_b32 s2, s2, 16
+; SDAG8-NEXT:    s_abs_i32 s1, s1
+; SDAG8-NEXT:    s_or_b32 s0, s0, s2
+; SDAG8-NEXT:    ; return to shader part epilog
+;
+; SDAG10-LABEL: abs_sgpr_v3i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG10-NEXT:    v_pk_sub_i16 v1, 0, s1
+; SDAG10-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG10-NEXT:    v_pk_max_i16 v1, s1, v1
+; SDAG10-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG10-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG10-NEXT:    ; return to shader part epilog
+;
+; SDAG1250-LABEL: abs_sgpr_v3i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    v_pk_sub_i16 v0, 0, s0
+; SDAG1250-NEXT:    v_pk_sub_i16 v1, 0, s1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, s0, v0
+; SDAG1250-NEXT:    v_pk_max_i16 v1, s1, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG1250-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG1250-NEXT:    ; return to shader part epilog
+;
 ; GFX6-LABEL: abs_sgpr_v3i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_sext_i32_i16 s0, s0
@@ -549,6 +1386,56 @@ define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
 }
 
 define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
+; SDAG6-LABEL: abs_vgpr_v3i16:
+; SDAG6:       ; %bb.0:
+; SDAG6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SDAG6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; SDAG6-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; SDAG6-NEXT:    v_max_i32_e32 v0, v3, v0
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; SDAG6-NEXT:    v_max_i32_e32 v1, v3, v1
+; SDAG6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
+; SDAG6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; SDAG6-NEXT:    v_max_i32_e32 v2, v3, v2
+; SDAG6-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG6-NEXT:    v_alignbit_b32 v1, v2, v1, 16
+; SDAG6-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG8-LABEL: abs_vgpr_v3i16:
+; SDAG8:       ; %bb.0:
+; SDAG8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG8-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG8-NEXT:    v_sub_u16_e32 v3, 0, v1
+; SDAG8-NEXT:    v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDAG8-NEXT:    v_max_i16_e32 v1, v1, v3
+; SDAG8-NEXT:    v_sub_u16_e32 v3, 0, v0
+; SDAG8-NEXT:    v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; SDAG8-NEXT:    v_max_i16_e32 v0, v0, v3
+; SDAG8-NEXT:    v_or_b32_e32 v0, v0, v2
+; SDAG8-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG10-LABEL: abs_vgpr_v3i16:
+; SDAG10:       ; %bb.0:
+; SDAG10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG10-NEXT:    v_pk_sub_i16 v2, 0, v0
+; SDAG10-NEXT:    v_pk_sub_i16 v3, 0, v1
+; SDAG10-NEXT:    v_pk_max_i16 v0, v0, v2
+; SDAG10-NEXT:    v_pk_max_i16 v1, v1, v3
+; SDAG10-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG1250-LABEL: abs_vgpr_v3i16:
+; SDAG1250:       ; %bb.0:
+; SDAG1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG1250-NEXT:    s_wait_kmcnt 0x0
+; SDAG1250-NEXT:    v_pk_sub_i16 v2, 0, v0
+; SDAG1250-NEXT:    v_pk_sub_i16 v3, 0, v1
+; SDAG1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG1250-NEXT:    v_pk_max_i16 v0, v0, v2
+; SDAG1250-NEXT:    v_pk_max_i16 v1, v1, v3
+; SDAG1250-NEXT:    s_set_pc_i64 s[30:31]
+;
 ; GFX6-LABEL: abs_vgpr_v3i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

>From ef92dedeef2a1684faa1bb2828ff1e38fb2b99ac Mon Sep 17 00:00:00 2001
From: Patrick Simmons <patrick.simmons at amd.com>
Date: Thu, 6 Nov 2025 13:14:40 -0500
Subject: [PATCH 20/21] clang-format

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 85b8333010990..e95dbcf2bf7b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5305,7 +5305,7 @@ SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
   if (!IsNegative)
     return TruncResult;
 
-  return DAG.getNegative(TruncResult,DL,N->getValueType(0));
+  return DAG.getNegative(TruncResult, DL, N->getValueType(0));
 }
 
 SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,

>From 6548414f8e34924fb1ebf1ba12209acd54e9a7bf Mon Sep 17 00:00:00 2001
From: Patrick Simmons <patrick.simmons at amd.com>
Date: Fri, 14 Nov 2025 16:34:03 -0500
Subject: [PATCH 21/21] This appears optimal

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 768c0abd2e3f1..d39a5a7e03ab3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -974,7 +974,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
                        Custom);
   }
 
-  setTargetDAGCombine({ISD::ADD,
+  setTargetDAGCombine({
+                       ISD::ABS,
+                       ISD::ADD,
                        ISD::PTRADD,
                        ISD::UADDO_CARRY,
                        ISD::SUB,
@@ -16854,6 +16856,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
     return SDValue();
 
+  // expandABS but only for i8 and i16
+  if (N->getOpcode() == ISD::ABS && !N->isDivergent() &&
+      (N->getValueType(0) == MVT::i8 || N->getValueType(0) == MVT::i16))
+    return expandABS(N, DCI.DAG);
+
   switch (N->getOpcode()) {
   case ISD::ADD:
     return performAddCombine(N, DCI);



More information about the llvm-commits mailing list