[llvm] 703e9e9 - [AMDGPU][True16][CodeGen] true16 codegen for bswap (#122849)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 06:37:01 PST 2025
Author: Brox Chen
Date: 2025-01-17T09:36:55-05:00
New Revision: 703e9e97d937f3bb25d4318d86e357a665e72731
URL: https://github.com/llvm/llvm-project/commit/703e9e97d937f3bb25d4318d86e357a665e72731
DIFF: https://github.com/llvm/llvm-project/commit/703e9e97d937f3bb25d4318d86e357a665e72731.diff
LOG: [AMDGPU][True16][CodeGen] true16 codegen for bswap (#122849)
true16 codegen pattern for bswap
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/bswap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 1abbf4c217a697..40a20fa9cb15ea 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3041,6 +3041,8 @@ def : GCNPat <
// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
// The 12s emit 0s.
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
def : GCNPat <
(i16 (bswap i16:$a)),
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
@@ -3050,6 +3052,19 @@ def : GCNPat <
(i32 (zext (bswap i16:$a))),
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
>;
+}
+
+let True16Predicate = UseRealTrue16Insts in {
+def : GCNPat <
+ (i16 (bswap i16:$a)),
+ (EXTRACT_SUBREG (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001))), lo16)
+>;
+
+def : GCNPat <
+ (i32 (zext (bswap i16:$a))),
+ (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001)))
+>;
+}
// Magic number: 1 | (0 << 8) | (3 << 16) | (2 << 24)
def : GCNPat <
diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll
index 30c8e94c9a27f5..a95a1aba0c9141 100644
--- a/llvm/test/CodeGen/AMDGPU/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/bswap.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck %s --check-prefix=SI
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16
declare i16 @llvm.bswap.i16(i16) nounwind readnone
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone
@@ -490,13 +491,21 @@ define float @missing_truncate_promote_bswap(i32 %arg) {
; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: missing_truncate_promote_bswap:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-REAL16-LABEL: missing_truncate_promote_bswap:
+; GFX11-REAL16: ; %bb.0: ; %bb
+; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-REAL16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
+; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-REAL16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: missing_truncate_promote_bswap:
+; GFX11-FAKE16: ; %bb.0: ; %bb
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
bb:
%tmp = trunc i32 %arg to i16
%tmp1 = call i16 @llvm.bswap.i16(i16 %tmp)
More information about the llvm-commits
mailing list