[llvm] acdc503 - [AMDGPU][GlobalISel] Update applyMappingImpl for G_ABS and type v2s16

Mirko Brkusanin via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 2 03:30:53 PDT 2023


Author: Mirko Brkusanin
Date: 2023-08-02T12:27:06+02:00
New Revision: acdc503d6c51417c0171373ad65f42fc8fcbe5a1

URL: https://github.com/llvm/llvm-project/commit/acdc503d6c51417c0171373ad65f42fc8fcbe5a1
DIFF: https://github.com/llvm/llvm-project/commit/acdc503d6c51417c0171373ad65f42fc8fcbe5a1.diff

LOG: [AMDGPU][GlobalISel] Update applyMappingImpl for G_ABS and type v2s16

For G_ABS with type v2s16 and sgpr inputs break down into two s32 G_ABS
instructions.

Patch by: Acim Maravic

Differential Revision: https://reviews.llvm.org/D155867

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 1bb5a2786b8b0f..aa142903f941f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2404,6 +2404,18 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
     MachineFunction *MF = MBB->getParent();
     ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);
 
+    if (DstTy.isVector() && Opc == AMDGPU::G_ABS) {
+      Register WideSrcLo, WideSrcHi;
+
+      std::tie(WideSrcLo, WideSrcHi) =
+          unpackV2S16ToS32(B, MI.getOperand(1).getReg(), TargetOpcode::G_SEXT);
+      auto Lo = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcLo});
+      auto Hi = B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcHi});
+      B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
+      MI.eraseFromParent();
+      return;
+    }
+
     if (DstTy.isVector()) {
       Register WideSrc0Lo, WideSrc0Hi;
       Register WideSrc1Lo, WideSrc1Hi;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index f07d2b83dbf31a..9480df5e83ba11 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -1,10 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
 
 declare i16 @llvm.abs.i16(i16, i1)
 declare i32 @llvm.abs.i32(i32, i1)
 declare i64 @llvm.abs.i64(i64, i1)
+declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
+declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
+declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
 
 define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
@@ -66,6 +71,13 @@ define amdgpu_cs i16 @abs_vgpr_i16(i16 %arg) {
 ; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
 ; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_sub_nc_u16 v1, 0, v0
+; GFX10-NEXT:    v_max_i16 v0, v0, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    ; return to shader part epilog
   %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
   ret i16 %res
 }
@@ -84,6 +96,13 @@ define amdgpu_cs i32 @abs_vgpr_i32(i32 %arg) {
 ; GFX8-NEXT:    v_max_i32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 0, v0
+; GFX10-NEXT:    v_max_i32_e32 v0, v0, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    ; return to shader part epilog
   %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
   ret i32 %res
 }
@@ -110,6 +129,17 @@ define amdgpu_cs i64 @abs_vgpr_i64(i64 %arg) {
 ; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    ; return to shader part epilog
   %res = call i64 @llvm.abs.i64(i64 %arg, i1 false)
   ret i64 %res
 }
@@ -146,6 +176,354 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
 ; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX8-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_v4i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_sub_nc_u32_e32 v4, 0, v0
+; GFX10-NEXT:    v_sub_nc_u32_e32 v5, 0, v1
+; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 0, v2
+; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 0, v3
+; GFX10-NEXT:    v_max_i32_e32 v0, v0, v4
+; GFX10-NEXT:    v_max_i32_e32 v1, v1, v5
+; GFX10-NEXT:    v_max_i32_e32 v2, v2, v6
+; GFX10-NEXT:    v_max_i32_e32 v3, v3, v7
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
+; GFX10-NEXT:    ; return to shader part epilog
   %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false)
   ret <4 x i32> %res
 }
+
+define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
+; GFX6-LABEL: abs_sgpr_v2i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i8 s0, s0
+; GFX6-NEXT:    s_sext_i32_i8 s1, s1
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_abs_i32 s1, s1
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_v2i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_lshl_b32 s0, s0, 8
+; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
+; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_abs_i32 s1, s1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_v2i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i8 s0, s0
+; GFX10-NEXT:    s_sext_i32_i8 s1, s1
+; GFX10-NEXT:    s_sext_i32_i16 s0, s0
+; GFX10-NEXT:    s_sext_i32_i16 s1, s1
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_abs_i32 s1, s1
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
+  ret <2 x i8> %res
+}
+
+define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
+; GFX6-LABEL: abs_vgpr_v2i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_vgpr_v2i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
+; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 8, v0
+; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
+; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v0
+; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 8, v1
+; GFX8-NEXT:    v_max_i16_e32 v0, v0, v2
+; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v1
+; GFX8-NEXT:    v_max_i16_e32 v1, v1, v2
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_v2i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX10-NEXT:    v_sub_nc_u16 v2, 0, v0
+; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
+; GFX10-NEXT:    v_max_i16 v0, v0, v2
+; GFX10-NEXT:    v_max_i16 v1, v1, v3
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
+  ret <2 x i8> %res
+}
+
+define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
+; GFX6-LABEL: abs_sgpr_v3i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i8 s0, s0
+; GFX6-NEXT:    s_sext_i32_i8 s1, s1
+; GFX6-NEXT:    s_sext_i32_i8 s2, s2
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_abs_i32 s1, s1
+; GFX6-NEXT:    s_abs_i32 s2, s2
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_v3i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_lshl_b32 s0, s0, 8
+; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
+; GFX8-NEXT:    s_lshl_b32 s2, s2, 8
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_sext_i32_i16 s2, s2
+; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
+; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
+; GFX8-NEXT:    s_ashr_i32 s2, s2, 8
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_sext_i32_i16 s2, s2
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_abs_i32 s1, s1
+; GFX8-NEXT:    s_abs_i32 s2, s2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_v3i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i8 s0, s0
+; GFX10-NEXT:    s_sext_i32_i8 s1, s1
+; GFX10-NEXT:    s_sext_i32_i8 s2, s2
+; GFX10-NEXT:    s_sext_i32_i16 s0, s0
+; GFX10-NEXT:    s_sext_i32_i16 s1, s1
+; GFX10-NEXT:    s_sext_i32_i16 s2, s2
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_abs_i32 s1, s1
+; GFX10-NEXT:    s_abs_i32 s2, s2
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
+  ret <3 x i8> %res
+}
+
+define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8>  %arg) {
+; GFX6-LABEL: abs_vgpr_v3i8:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
+; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_vgpr_v3i8:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
+; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 8, v0
+; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
+; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v0
+; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 8, v1
+; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
+; GFX8-NEXT:    v_max_i16_e32 v0, v0, v3
+; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v1
+; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 8, v2
+; GFX8-NEXT:    v_max_i16_e32 v1, v1, v3
+; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v2
+; GFX8-NEXT:    v_max_i16_e32 v2, v2, v3
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_v3i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX10-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX10-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v0
+; GFX10-NEXT:    v_sub_nc_u16 v4, 0, v1
+; GFX10-NEXT:    v_sub_nc_u16 v5, 0, v2
+; GFX10-NEXT:    v_max_i16 v0, v0, v3
+; GFX10-NEXT:    v_max_i16 v1, v1, v4
+; GFX10-NEXT:    v_max_i16 v2, v2, v5
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
+  ret <3 x i8> %res
+}
+
+define amdgpu_cs <2 x i16> @abs_sgpr_v2i16(<2 x i16> inreg %arg) {
+; GFX6-LABEL: abs_sgpr_v2i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_sext_i32_i16 s1, s1
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_abs_i32 s1, s1
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_v2i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s1, s1
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX8-NEXT:    s_or_b32 s0, s0, s1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_v2i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s1, s0
+; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX10-NEXT:    s_abs_i32 s1, s1
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
+  ret <2 x i16> %res
+}
+
+define amdgpu_cs <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) {
+; GFX6-LABEL: abs_vgpr_v2i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v1
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v2
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_vgpr_v2i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-NEXT:    v_sub_u16_e32 v2, 0, v0
+; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v1
+; GFX8-NEXT:    v_max_i16_e32 v0, v0, v2
+; GFX8-NEXT:    v_max_i16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_v2i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_pk_sub_i16 v1, 0, v0
+; GFX10-NEXT:    v_pk_max_i16 v0, v0, v1
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
+  ret <2 x i16> %res
+}
+
+define amdgpu_cs <3 x i16> @abs_sgpr_v3i16(<3 x i16> inreg %arg) {
+; GFX6-LABEL: abs_sgpr_v3i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_sext_i32_i16 s0, s0
+; GFX6-NEXT:    s_sext_i32_i16 s1, s1
+; GFX6-NEXT:    s_sext_i32_i16 s2, s2
+; GFX6-NEXT:    s_abs_i32 s0, s0
+; GFX6-NEXT:    s_abs_i32 s1, s1
+; GFX6-NEXT:    s_abs_i32 s2, s2
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_sgpr_v3i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX8-NEXT:    s_sext_i32_i16 s2, s2
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_abs_i32 s2, s2
+; GFX8-NEXT:    s_abs_i32 s0, s0
+; GFX8-NEXT:    s_sext_i32_i16 s1, s1
+; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT:    s_abs_i32 s1, s1
+; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
+; GFX8-NEXT:    s_or_b32 s0, s0, s2
+; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_sgpr_v3i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_sext_i32_i16 s2, s0
+; GFX10-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX10-NEXT:    s_abs_i32 s2, s2
+; GFX10-NEXT:    s_abs_i32 s0, s0
+; GFX10-NEXT:    s_sext_i32_i16 s1, s1
+; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
+; GFX10-NEXT:    s_abs_i32 s1, s1
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
+  ret <3 x i16> %res
+}
+
+define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
+; GFX6-LABEL: abs_vgpr_v3i16:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v0
+; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; GFX6-NEXT:    v_max_i32_e32 v0, v0, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; GFX6-NEXT:    v_max_i32_e32 v1, v1, v3
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
+; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: abs_vgpr_v3i16:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX8-NEXT:    v_sub_u16_e32 v3, 0, v0
+; GFX8-NEXT:    v_sub_u16_e32 v4, 0, v2
+; GFX8-NEXT:    v_sub_u16_e32 v5, 0, v1
+; GFX8-NEXT:    v_max_i16_e32 v0, v0, v3
+; GFX8-NEXT:    v_max_i16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT:    v_max_i16_e32 v1, v1, v5
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: abs_vgpr_v3i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_pk_sub_i16 v2, 0, v0
+; GFX10-NEXT:    v_sub_nc_u16 v3, 0, v1
+; GFX10-NEXT:    v_pk_max_i16 v0, v0, v2
+; GFX10-NEXT:    v_max_i16 v1, v1, v3
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX10-NEXT:    ; return to shader part epilog
+  %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
+  ret <3 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir
new file mode 100644
index 00000000000000..d3660e1f0a705d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - | FileCheck %s
+
+---
+name: abs_sgpr_s16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: abs_sgpr_s16
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
+    ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]]
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32)
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s16) = G_TRUNC %1
+    %5:_(s16) = G_ABS %2
+...
+
+---
+name: abs_vgpr_s16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: abs_vgpr_s16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[C]], [[TRUNC]]
+    ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s16) = G_SMAX [[TRUNC]], [[SUB]]
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s16) = G_TRUNC %1
+    %5:_(s16) = G_ABS %2
+...
+
+---
+name: abs_sgpr_v2i16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: abs_sgpr_v2i16
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32)
+    ; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT_INREG]]
+    ; CHECK-NEXT: [[ABS1:%[0-9]+]]:sgpr(s32) = G_ABS [[ASHR]]
+    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ABS]](s32), [[ABS1]](s32)
+    %1:_(<2 x s16>) = COPY $sgpr0
+    %5:_(<2 x s16>) = G_ABS %1
+...
+
+---
+name: abs_vgpr_v2i16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: abs_vgpr_v2i16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[BUILD_VECTOR]], [[COPY]]
+    ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[SUB]]
+    %1:_(<2 x s16>) = COPY $vgpr0
+    %5:_(<2 x s16>) = G_ABS %1
+...


        


More information about the llvm-commits mailing list