[llvm] a3b3c02 - [AMDGPU][NFC] Pre-commit tests for readanylane combines (#172398)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 16 08:25:33 PST 2025
Author: vangthao95
Date: 2025-12-16T08:25:29-08:00
New Revision: a3b3c027bb61f5275087335cf2c9b1cfe4ac9768
URL: https://github.com/llvm/llvm-project/commit/a3b3c027bb61f5275087335cf2c9b1cfe4ac9768
DIFF: https://github.com/llvm/llvm-project/commit/a3b3c027bb61f5275087335cf2c9b1cfe4ac9768.diff
LOG: [AMDGPU][NFC] Pre-commit tests for readanylane combines (#172398)
Added:
Modified:
llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
index 5f72d3e2ab161..c035bd0ecfdec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
@@ -143,3 +143,44 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vg
%bitcast = bitcast <2 x i16> %extracted to float
ret float %bitcast
}
+
+define amdgpu_ps void @op_readanylanes_merge_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
+; CHECK-LABEL: op_readanylanes_merge_to_virtual_vgpr:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; CHECK-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: v_readfirstlane_b32 s1, v1
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; CHECK-NEXT: s_endpgm
+ %load = load volatile <2 x float>, ptr addrspace(1) %ptr0
+ %fneg = fneg <2 x float> %load
+ store <2 x float> %fneg, ptr addrspace(1) %ptr1
+ ret void
+}
+
+define amdgpu_ps void @op_readanylanes_merge_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) {
+; CHECK-LABEL: op_readanylanes_merge_bitcast_to_virtual_vgpr:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; CHECK-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: v_readfirstlane_b32 s1, v1
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; CHECK-NEXT: s_endpgm
+ %load = load volatile <2 x float>, ptr addrspace(1) %ptr0
+ %fneg = fneg <2 x float> %load
+ %bitcast = bitcast <2 x float> %fneg to double
+ store double %bitcast, ptr addrspace(1) %ptr1
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
index 1c1cda2157c9f..64276baad4de1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
@@ -345,3 +345,169 @@ body: |
SI_RETURN_TO_EPILOG implicit $vgpr0
...
+---
+name: op_readanylanes_merge_to_virtual_vgpr
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: op_readanylanes_merge_to_virtual_vgpr
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: G_STORE [[COPY4]](<2 x s32>), [[MV1]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32)
+ %3:sgpr(s32) = COPY $sgpr2
+ %4:sgpr(s32) = COPY $sgpr3
+ %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32)
+ %6:sgpr(<2 x s32>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ %7:sgpr(s32), %8:sgpr(s32) = G_UNMERGE_VALUES %6(<2 x s32>)
+ %9:sgpr(s32) = G_FNEG %7
+ %10:sgpr(s32) = G_FNEG %8
+ %11:sgpr(<2 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32)
+ G_STORE %11(<2 x s32>), %5(p1) :: (store (<2 x s32>), addrspace 1)
+ S_ENDPGM 0
+...
+
+---
+name: op_readanylanes_merge_to_physical_vgpr
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: op_readanylanes_merge_to_physical_vgpr
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32)
+ %3:sgpr(s32) = COPY $sgpr2
+ %4:sgpr(s32) = COPY $sgpr3
+ %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32)
+ %6:sgpr(<2 x s32>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ %7:sgpr(s32), %8:sgpr(s32) = G_UNMERGE_VALUES %6(<2 x s32>)
+ %9:sgpr(s32) = G_FNEG %7
+ %10:sgpr(s32) = G_FNEG %8
+ %11:sgpr(<2 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32)
+ $vgpr0_vgpr1 = COPY %11(<2 x s32>)
+ SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+...
+
+---
+name: op_readanylanes_merge_bitcast_to_virtual_vgpr
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: op_readanylanes_merge_bitcast_to_virtual_vgpr
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[BITCAST]](s64)
+ ; CHECK-NEXT: G_STORE [[COPY4]](s64), [[MV1]](p1) :: (store (s64), addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32)
+ %3:sgpr(s32) = COPY $sgpr2
+ %4:sgpr(s32) = COPY $sgpr3
+ %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32)
+ %6:sgpr(<2 x s32>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ %7:sgpr(s32), %8:sgpr(s32) = G_UNMERGE_VALUES %6(<2 x s32>)
+ %9:sgpr(s32) = G_FNEG %7
+ %10:sgpr(s32) = G_FNEG %8
+ %11:sgpr(<2 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32)
+ %12:sgpr(s64) = G_BITCAST %11(<2 x s32>)
+ G_STORE %12(s64), %5(p1) :: (store (s64), addrspace 1)
+ S_ENDPGM 0
+...
+
+---
+name: op_readanylanes_merge_bitcast_to_physical_vgpr
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: op_readanylanes_merge_bitcast_to_physical_vgpr
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
+ ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32)
+ %3:sgpr(s32) = COPY $sgpr2
+ %4:sgpr(s32) = COPY $sgpr3
+ %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32)
+ %6:sgpr(<2 x s32>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
+ %7:sgpr(s32), %8:sgpr(s32) = G_UNMERGE_VALUES %6(<2 x s32>)
+ %9:sgpr(s32) = G_FNEG %7
+ %10:sgpr(s32) = G_FNEG %8
+ %11:sgpr(<2 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32)
+ %12:sgpr(s64) = G_BITCAST %11(<2 x s32>)
+ $vgpr0_vgpr1 = COPY %12(s64)
+ SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+...
+
More information about the llvm-commits
mailing list