[llvm] 60fea27 - AMDGPU/GlobalISel: Improve 16-bit bswap
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 14 15:57:53 PST 2020
Author: Matt Arsenault
Date: 2020-02-14T15:57:39-08:00
New Revision: 60fea2713d3f37d70383aacaa75f61344cc3234a
URL: https://github.com/llvm/llvm-project/commit/60fea2713d3f37d70383aacaa75f61344cc3234a
DIFF: https://github.com/llvm/llvm-project/commit/60fea2713d3f37d70383aacaa75f61344cc3234a.diff
LOG: AMDGPU/GlobalISel: Improve 16-bit bswap
Match the new DAG behavior and use v_perm_b32 when available. Also
does better on SI/CI by expanding 16-bit swaps. Also fix
non-power-of-2 cases.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 1cf95c5b522f..c56c69d723b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -598,12 +598,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0, 32)
.widenScalarToNextPow2(1, 32);
- getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
+ getActionDefinitionsBuilder(G_BITREVERSE)
.legalFor({S32})
.clampScalar(0, S32, S32)
.scalarize(0);
if (ST.has16BitInsts()) {
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalFor({S16, S32, V2S16})
+ .clampMaxNumElements(0, S16, 2)
+ // FIXME: Fixing non-power-of-2 before clamp is workaround for
+ // narrowScalar limitation.
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, S16, S32)
+ .scalarize(0);
+
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({S32, S16, V2S16})
@@ -620,6 +629,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
} else {
+ // TODO: Should have same legality without v_perm_b32
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalFor({S32})
+ .lowerIf(narrowerThan(0, 32))
+ // FIXME: Fixing non-power-of-2 before clamp is workaround for
+ // narrowScalar limitation.
+ .widenScalarToNextPow2(0)
+ .maxScalar(0, S32)
+ .scalarize(0)
+ .lower();
+
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({S32})
.clampScalar(0, S32, S32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
index a374369478d1..b14b984ff9ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
@@ -341,32 +341,26 @@ define <2 x i64> @v_bswap_v2i64(<2 x i64> %src) {
define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) {
; GFX7-LABEL: s_bswap_i16:
; GFX7: ; %bb.0:
-; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8
-; GFX7-NEXT: v_alignbit_b32 v1, s0, s0, 24
-; GFX7-NEXT: s_mov_b32 s0, 0xff00ff
-; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0
-; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_readfirstlane_b32 s0, v0
+; GFX7-NEXT: s_lshl_b32 s1, s0, 8
+; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT: s_lshr_b32 s0, s0, 8
+; GFX7-NEXT: s_or_b32 s0, s0, s1
+; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_bswap_i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: s_mov_b32 s0, 0x10203
+; GFX8-NEXT: s_mov_b32 s0, 0xc0c0001
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff
-; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_bswap_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: s_mov_b32 s0, 0x10203
+; GFX9-NEXT: s_mov_b32 s0, 0xc0c0001
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
-; GFX9-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%bswap = call i16 @llvm.bswap.i16(i16 %src)
@@ -380,27 +374,24 @@ define i16 @v_bswap_i16(i16 %src) {
; GFX7-LABEL: v_bswap_i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8
-; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
-; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
-; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
+; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_bswap_i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, 0x10203
+; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_bswap_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0x10203
+; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
ret i16 %bswap
@@ -409,51 +400,34 @@ define i16 @v_bswap_i16(i16 %src) {
define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) {
; GFX7-LABEL: s_bswap_v2i16:
; GFX7: ; %bb.0:
-; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8
-; GFX7-NEXT: v_alignbit_b32 v1, s0, s0, 24
-; GFX7-NEXT: s_mov_b32 s0, 0xff00ff
-; GFX7-NEXT: v_bfi_b32 v0, s0, v1, v0
-; GFX7-NEXT: v_alignbit_b32 v1, s1, s1, 8
-; GFX7-NEXT: v_alignbit_b32 v2, s1, s1, 24
-; GFX7-NEXT: v_bfi_b32 v1, s0, v2, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX7-NEXT: s_mov_b32 s0, 0xffff
-; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX7-NEXT: v_and_b32_e32 v1, s0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, s0, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7-NEXT: v_readfirstlane_b32 s0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xffff
+; GFX7-NEXT: s_lshl_b32 s2, s0, 8
+; GFX7-NEXT: s_and_b32 s0, s0, s3
+; GFX7-NEXT: s_lshr_b32 s0, s0, 8
+; GFX7-NEXT: s_or_b32 s0, s0, s2
+; GFX7-NEXT: s_lshl_b32 s2, s1, 8
+; GFX7-NEXT: s_and_b32 s1, s1, s3
+; GFX7-NEXT: s_lshr_b32 s1, s1, 8
+; GFX7-NEXT: s_or_b32 s1, s1, s2
+; GFX7-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX7-NEXT: s_lshl_b32 s1, s1, 16
+; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_bswap_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: s_mov_b32 s0, 0x10203
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: s_mov_b32 s0, 0x2030001
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX8-NEXT: v_perm_b32 v1, 0, v1, s0
-; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_bswap_v2i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_lshr_b32 s1, s0, 16
; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: s_mov_b32 s0, 0x10203
-; GFX9-NEXT: v_mov_b32_e32 v1, s1
-; GFX9-NEXT: v_perm_b32 v1, 0, v1, s0
+; GFX9-NEXT: s_mov_b32 s0, 0x2030001
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s0
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
@@ -466,30 +440,25 @@ define i32 @v_bswap_i16_zext_to_i32(i16 %src) {
; GFX7-LABEL: v_bswap_i16_zext_to_i32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8
-; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
-; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
-; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
+; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_bswap_i16_zext_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, 0x10203
+; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff
-; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_bswap_i16_zext_to_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0x10203
+; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
-; GFX9-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
%zext = zext i16 %bswap to i32
@@ -500,29 +469,26 @@ define i32 @v_bswap_i16_sext_to_i32(i16 %src) {
; GFX7-LABEL: v_bswap_i16_sext_to_i32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_alignbit_b32 v1, v0, v0, 8
-; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
-; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
-; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
+; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_bswap_i16_sext_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, 0x10203
+; GFX8-NEXT: s_mov_b32 s4, 0xc0c0001
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_bswap_i16_sext_to_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0x10203
+; GFX9-NEXT: s_mov_b32 s4, 0xc0c0001
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX9-NEXT: s_setpc_b64 s[30:31]
%bswap = call i16 @llvm.bswap.i16(i16 %src)
@@ -534,42 +500,29 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) {
; GFX7-LABEL: v_bswap_v2i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_alignbit_b32 v2, v0, v0, 8
-; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
-; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
-; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v2
-; GFX7-NEXT: v_alignbit_b32 v2, v1, v1, 8
-; GFX7-NEXT: v_alignbit_b32 v1, v1, v1, 24
-; GFX7-NEXT: v_bfi_b32 v1, s4, v1, v2
-; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX7-NEXT: s_mov_b32 s4, 0xffff
+; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v0
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v1
+; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
+; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v1
+; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_bswap_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT: s_mov_b32 s4, 0x10203
+; GFX8-NEXT: s_mov_b32 s4, 0x2030001
; GFX8-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4
-; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_bswap_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x10203
-; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4
+; GFX9-NEXT: s_mov_b32 s4, 0x2030001
; GFX9-NEXT: v_perm_b32 v0, 0, v0, s4
-; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%bswap = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %src)
ret <2 x i16> %bswap
@@ -581,6 +534,46 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) {
; ret <3 x i16> %bswap
; }
+define i64 @v_bswap_i48(i64 %src) {
+; GFX7-LABEL: v_bswap_i48:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_alignbit_b32 v2, v1, v1, 8
+; GFX7-NEXT: v_alignbit_b32 v1, v1, v1, 24
+; GFX7-NEXT: s_mov_b32 s4, 0xff00ff
+; GFX7-NEXT: v_bfi_b32 v1, s4, v1, v2
+; GFX7-NEXT: v_alignbit_b32 v2, v0, v0, 8
+; GFX7-NEXT: v_alignbit_b32 v0, v0, v0, 24
+; GFX7-NEXT: v_bfi_b32 v2, s4, v0, v2
+; GFX7-NEXT: v_lshr_b64 v[0:1], v[1:2], 16
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bswap_i48:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 s4, 0x10203
+; GFX8-NEXT: v_perm_b32 v1, 0, v1, s4
+; GFX8-NEXT: v_perm_b32 v2, 0, v0, s4
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_bswap_i48:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s4, 0x10203
+; GFX9-NEXT: v_perm_b32 v1, 0, v1, s4
+; GFX9-NEXT: v_perm_b32 v2, 0, v0, s4
+; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[1:2]
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %trunc = trunc i64 %src to i48
+ %bswap = call i48 @llvm.bswap.i48(i48 %trunc)
+ %zext = zext i48 %bswap to i64
+ ret i64 %zext
+}
+
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
declare i16 @llvm.bswap.i16(i16) #1
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
@@ -589,6 +582,7 @@ declare i32 @llvm.bswap.i32(i32) #1
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1
declare i64 @llvm.bswap.i64(i64) #1
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #1
+declare i48 @llvm.bswap.i48(i48) #1
attributes #0 = { convergent nounwind readnone }
attributes #1 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
index a7c4773c20d1..2939c599646a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
---
name: bswap_s8
@@ -7,14 +8,32 @@ name: bswap_s8
body: |
bb.0:
liveins: $vgpr0
- ; CHECK-LABEL: name: bswap_s8
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+ ; GFX7-LABEL: name: bswap_s8
+ ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32)
+ ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+ ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32)
+ ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
+ ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
+ ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; GFX7: $vgpr0 = COPY [[COPY7]](s32)
+ ; GFX8-LABEL: name: bswap_s8
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]]
+ ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[BSWAP]], [[C]](s16)
+ ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
+ ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s8) = G_TRUNC %0
%2:_(s8) = G_BSWAP %1
@@ -28,14 +47,27 @@ name: bswap_s16
body: |
bb.0:
liveins: $vgpr0
- ; CHECK-LABEL: name: bswap_s16
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+ ; GFX7-LABEL: name: bswap_s16
+ ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32)
+ ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+ ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+ ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
+ ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX8-LABEL: name: bswap_s16
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]]
+ ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BSWAP]](s16)
+ ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s16) = G_BSWAP %1
@@ -49,14 +81,32 @@ name: bswap_s24
body: |
bb.0:
liveins: $vgpr0
- ; CHECK-LABEL: name: bswap_s24
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+ ; GFX7-LABEL: name: bswap_s24
+ ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+ ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32)
+ ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+ ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32)
+ ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
+ ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
+ ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; GFX7: $vgpr0 = COPY [[COPY7]](s32)
+ ; GFX8-LABEL: name: bswap_s24
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
+ ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX8: $vgpr0 = COPY [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s24) = G_TRUNC %0
%2:_(s24) = G_BSWAP %1
@@ -70,10 +120,14 @@ name: bswap_s32
body: |
bb.0:
liveins: $vgpr0
- ; CHECK-LABEL: name: bswap_s32
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
- ; CHECK: $vgpr0 = COPY [[BSWAP]](s32)
+ ; GFX7-LABEL: name: bswap_s32
+ ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
+ ; GFX7: $vgpr0 = COPY [[BSWAP]](s32)
+ ; GFX8-LABEL: name: bswap_s32
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
+ ; GFX8: $vgpr0 = COPY [[BSWAP]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_BSWAP %0
$vgpr0 = COPY %1
@@ -85,45 +139,259 @@ name: bswap_v2s16
body: |
bb.0:
liveins: $vgpr0
- ; CHECK-LABEL: name: bswap_v2s16
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[COPY2]](s32)
- ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[COPY3]]
- ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C]](s32)
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
- ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
- ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
- ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
- ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX7-LABEL: name: bswap_v2s16
+ ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32)
+ ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+ ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+ ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
+ ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+ ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
+ ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32)
+ ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]]
+ ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; GFX7: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; GFX7: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX7: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; GFX8-LABEL: name: bswap_v2s16
+ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[COPY]]
+ ; GFX8: $vgpr0 = COPY [[BSWAP]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = G_BSWAP %0
$vgpr0 = COPY %1
...
+---
+name: bswap_v3s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX7-LABEL: name: bswap_v3s16
+ ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+ ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
+ ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY4]](s32)
+ ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
+ ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32)
+ ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; GFX7: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]]
+ ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY8]](s32)
+ ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]]
+ ; GFX7: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[COPY10]](s32)
+ ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+ ; GFX7: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX7: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
+ ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY12]](s32)
+ ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX7: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]]
+ ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; GFX7: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16)
+ ; GFX7: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
+ ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX7: $vgpr1 = COPY [[ANYEXT1]](s32)
+ ; GFX7: $vgpr2 = COPY [[ANYEXT2]](s32)
+ ; GFX8-LABEL: name: bswap_v3s16
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+ ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]]
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
+ ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+ ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
+ ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+ ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX8: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX8: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[EXTRACT2]]
+ ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
+ ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[BSWAP]](<2 x s16>), 0
+ ; GFX8: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
+ ; GFX8: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
+ ; GFX8: [[BSWAP1:%[0-9]+]]:_(s16) = G_BSWAP [[COPY6]]
+ ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT3]](<3 x s16>), 0
+ ; GFX8: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[BSWAP1]](s16), 32
+ ; GFX8: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT4]](<4 x s16>), 0
+ ; GFX8: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT4]](<3 x s16>), 0
+ ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>)
+ ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+ ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+ ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX8: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX8: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+ ; GFX8: $vgpr0 = COPY [[COPY7]](s32)
+ ; GFX8: $vgpr1 = COPY [[COPY8]](s32)
+ ; GFX8: $vgpr2 = COPY [[COPY9]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s16) = G_TRUNC %0
+ %4:_(s16) = G_TRUNC %1
+ %5:_(s16) = G_TRUNC %2
+
+ %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5
+ %7:_(<3 x s16>) = G_BSWAP %6
+ %8:_(s16), %9:_(s16), %10:_(s16) = G_UNMERGE_VALUES %7
+ %11:_(s32) = G_ANYEXT %8
+ %12:_(s32) = G_ANYEXT %9
+ %13:_(s32) = G_ANYEXT %10
+ $vgpr0 = COPY %11
+ $vgpr1 = COPY %12
+ $vgpr2 = COPY %13
+...
+
+---
+name: bswap_v4s16
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GFX7-LABEL: name: bswap_v4s16
+ ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+ ; GFX7: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX7: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32)
+ ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX7: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX7: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+ ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
+ ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX7: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]]
+ ; GFX7: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32)
+ ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; GFX7: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX7: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
+ ; GFX7: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32)
+ ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+ ; GFX7: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]]
+ ; GFX7: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX7: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[COPY8]](s32)
+ ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+ ; GFX7: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX7: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]]
+ ; GFX7: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY10]](s32)
+ ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
+ ; GFX7: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]]
+ ; GFX7: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+ ; GFX7: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[COPY12]](s32)
+ ; GFX7: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+ ; GFX7: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX7: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+ ; GFX7: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]]
+ ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY14]](s32)
+ ; GFX7: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
+ ; GFX7: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]]
+ ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; GFX7: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; GFX7: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+ ; GFX7: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+ ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+ ; GFX7: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
+ ; GFX7: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+ ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ; GFX8-LABEL: name: bswap_v4s16
+ ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+ ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV]]
+ ; GFX8: [[BSWAP1:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[UV1]]
+ ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BSWAP]](<2 x s16>), [[BSWAP1]](<2 x s16>)
+ ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+ %1:_(<4 x s16>) = G_BSWAP %0
+ $vgpr0_vgpr1 = COPY %1
+...
+
---
name: bswap_v2s32
body: |
bb.0:
liveins: $vgpr0_vgpr1
- ; CHECK-LABEL: name: bswap_v2s32
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
- ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX7-LABEL: name: bswap_v2s32
+ ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+ ; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+ ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX8-LABEL: name: bswap_v2s32
+ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+ ; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+ ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_BSWAP %0
$vgpr0_vgpr1 = COPY %1
@@ -135,14 +403,58 @@ name: bswap_s64
body: |
bb.0:
liveins: $vgpr0_vgpr1
- ; CHECK-LABEL: name: bswap_s64
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
- ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
- ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; GFX7-LABEL: name: bswap_s64
+ ; GFX7: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+ ; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+ ; GFX7: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ ; GFX8-LABEL: name: bswap_s64
+ ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+ ; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+ ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_BSWAP %0
$vgpr0_vgpr1 = COPY %1
...
+
+---
+name: bswap_v2s64
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GFX7-LABEL: name: bswap_v2s64
+ ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GFX7: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+ ; GFX7: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]]
+ ; GFX7: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]]
+ ; GFX7: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+ ; GFX7: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]]
+ ; GFX7: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]]
+ ; GFX7: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32)
+ ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ; GFX8-LABEL: name: bswap_v2s64
+ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV3]]
+ ; GFX8: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV2]]
+ ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+ ; GFX8: [[BSWAP2:%[0-9]+]]:_(s32) = G_BSWAP [[UV5]]
+ ; GFX8: [[BSWAP3:%[0-9]+]]:_(s32) = G_BSWAP [[UV4]]
+ ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32)
+ ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+ ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(<2 x s64>) = G_BSWAP %0
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
More information about the llvm-commits
mailing list