[llvm] AMDGPU: Expand shuffle testing with generated tests (PR #123574)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 01:25:25 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Add some generated tests with every shuffle permutation
for relevant vector element types and sizes. Not sure if this
is going overboard with the number of tests. I pruned out the largest
cases (16 and 32-bit cases are impractically large), and there's
redundancy when testing the pointer cases (at least for SelectionDAG).
This uses inline assembly to produce sample values because of how the
ABI is lowered when using a function argument. Since we break all
arguments into 32-bit pieces, a shuffle never ends up forming. We
need separate handling to reconstruct shuffles in contexts involving
physical registers in ABI contexts.
I wrote a small tool to generate these, so I can easily change the
exact test body. Not sure if it's worth posting anywhere.
This is in preparation for making better use of v_pk_mov_b32,
v_mov_b64 and s_mov_b64 in shuffles.
---
Patch is 32.37 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123574.diff
81 Files Affected:
- (added) llvm/test/CodeGen/AMDGPU/legal-shuffle.v2f32.ll (+567)
- (added) llvm/test/CodeGen/AMDGPU/legal-shuffle.v2i32.ll (+2020)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v2bf16.ll (+2042)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v3bf16.ll (+4437)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v4bf16.ll (+7377)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v8bf16.ll (+27671)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v2f16.ll (+2042)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v3f16.ll (+4437)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v4f16.ll (+7377)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v8f16.ll (+27671)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll (+1875)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll (+4236)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll (+6929)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll (+25924)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v2i16.ll (+2021)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v3i16.ll (+4404)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v4i16.ll (+7263)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v8i16.ll (+27155)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll (+1875)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll (+4236)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll (+6929)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll (+25924)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll (+2104)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll (+4469)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll (+7547)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll (+31395)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll (+2104)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll (+4469)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll (+7547)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll (+1875)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll (+4236)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll (+6929)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll (+25924)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll (+4042)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll (+9009)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll (+15446)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll (+4042)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll (+9009)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll (+15446)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll (+4166)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll (+8883)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll (+15324)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v2i16.ll (+3964)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll (+8900)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll (+15161)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll (+4166)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll (+8883)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll (+15324)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll (+4508)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll (+9583)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll (+16611)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll (+4508)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll (+9583)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll (+16611)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll (+4166)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll (+8883)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll (+15324)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v2bf16.ll (+6535)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll (+14253)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll (+24202)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v2f16.ll (+6535)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll (+14253)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll (+24202)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll (+6422)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll (+14014)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll (+24149)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v2i16.ll (+6199)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll (+13983)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll (+23344)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll (+6434)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll (+14014)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll (+24149)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll (+7310)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll (+16014)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll (+27249)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll (+7310)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll (+16014)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll (+27249)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll (+6434)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll (+14014)
- (added) llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll (+24149)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/legal-shuffle.v2f32.ll b/llvm/test/CodeGen/AMDGPU/legal-shuffle.v2f32.ll
new file mode 100644
index 00000000000000..e6c155d71e9414
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/legal-shuffle.v2f32.ll
@@ -0,0 +1,567 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
+
+define void @v_shuffle_v2f32_00(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_00:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 0>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_01(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_01:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 1>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_02(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_02:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_02:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 2>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_03(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_03:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_03:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 3>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_10(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_10:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_10:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 0>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_11(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_11:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 1>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_12(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_12:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[4:5]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-NEXT: v_mov_b32_e32 v3, v4
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 2>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_13(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_13:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[4:5]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 3>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_20(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_20:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_20:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 0>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_21(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_21:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[4:5]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 1>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_22(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_22:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 2>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_23(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_23:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 3>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_30(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_30:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_30:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 0>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_31(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_31:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[4:5]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 1>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_32(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[3:4], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_32:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 2>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_33(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_33:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 3>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_uu(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_uu:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> poison
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_0u(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_0u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 poison>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_1u(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_1u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 poison>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_2u(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_2u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 poison>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_3u(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_3u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 poison>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_u0(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_u0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v[0:1], v[1:2], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v2f32_u0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 poison, i32 0>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_u1(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: v_shuffle_v2f32_u1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def v[2:3]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val0 = call <2 x float> asm "; def $0", "=v"()
+ %val1 = call <2 x float> asm "; def $0", "=v"()
+ %shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 poison, i32 1>
+ store <2 x float> %shuffle, ptr addrspace(1) %ptr
+ ret void
+}
+
+define void @v_shuffle_v2f32_u2(ptr addrspace(1) %ptr) {
+; GFX900-LABEL: v_shuffle_v2f32_u2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ;;#A...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/123574
More information about the llvm-commits
mailing list