[llvm] [AMDGPU] Allocate i1 argument to SGPRs (PR #72461)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 06:43:19 PDT 2024
================
@@ -0,0 +1,506 @@
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX9 -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11 -enable-var-scope %s
+
+define void @void_func_i1(i1 %arg0) {
+; GFX9-LABEL: name: void_func_i1
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: liveins: $sgpr4_sgpr5
+; GFX9-NEXT: {{ $}}
+; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s1) = COPY $sgpr4_sgpr5
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: G_STORE [[COPY]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: void_func_i1
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: liveins: $sgpr0
+; GFX11-NEXT: {{ $}}
+; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32(s1) = COPY $sgpr0
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: G_STORE [[COPY]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: SI_RETURN
+ store i1 %arg0, ptr addrspace(1) undef
+ ret void
+}
+
+define void @test_call_void_func_i1() {
+; GFX9-LABEL: name: test_call_void_func_i1
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX9-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1
+; GFX9-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](s1)
+; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
+; GFX9-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1, csr_amdgpu, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: test_call_void_func_i1
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX11-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1
+; GFX11-NEXT: $sgpr0 = COPY [[LOAD]](s1)
+; GFX11-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1, csr_amdgpu, implicit $sgpr0
+; GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX11-NEXT: SI_RETURN
+ %val = load i1, ptr addrspace(1) undef
+ call void @void_func_i1(i1 %val)
+ ret void
+}
+
+define void @void_func_i1_zeroext(i1 zeroext %arg0) {
+; GFX9-LABEL: name: void_func_i1_zeroext
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: liveins: $sgpr4_sgpr5
+; GFX9-NEXT: {{ $}}
+; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s1) = COPY $sgpr4_sgpr5
+; GFX9-NEXT: [[CONST:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s1)
+; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[CONST]]
+; GFX9-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: void_func_i1_zeroext
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: liveins: $sgpr0
+; GFX11-NEXT: {{ $}}
+; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32(s1) = COPY $sgpr0
+; GFX11-NEXT: [[CONST:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s1)
+; GFX11-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[CONST]]
+; GFX11-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: SI_RETURN
+ %ext = zext i1 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, ptr addrspace(1) undef
+ ret void
+}
+
+define void @test_call_void_func_i1_zeroext() {
+; GFX9-LABEL: name: test_call_void_func_i1_zeroext
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX9-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1_zeroext
+; GFX9-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](s1)
+; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
+; GFX9-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1_zeroext, csr_amdgpu, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: test_call_void_func_i1_zeroext
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX11-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1_zeroext
+; GFX11-NEXT: $sgpr0 = COPY [[LOAD]](s1)
+; GFX11-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1_zeroext, csr_amdgpu, implicit $sgpr0
+; GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX11-NEXT: SI_RETURN
+ %val = load i1, ptr addrspace(1) undef
+ call void @void_func_i1_zeroext(i1 %val)
+ ret void
+}
+
+define void @void_func_i1_signext(i1 signext %arg0) {
+; GFX9-LABEL: name: void_func_i1_signext
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: liveins: $sgpr4_sgpr5
+; GFX9-NEXT: {{ $}}
+; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s1) = COPY $sgpr4_sgpr5
+; GFX9-NEXT: [[CONST:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s1)
+; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[CONST]]
+; GFX9-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: void_func_i1_signext
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: liveins: $sgpr0
+; GFX11-NEXT: {{ $}}
+; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32(s1) = COPY $sgpr0
+; GFX11-NEXT: [[CONST:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s1)
+; GFX11-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[CONST]]
+; GFX11-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: SI_RETURN
+ %ext = sext i1 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, ptr addrspace(1) undef
+ ret void
+}
+
+define void @test_call_void_func_i1_signext() {
+; GFX9-LABEL: name: test_call_void_func_i1_signext
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX9-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1_signext
+; GFX9-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](s1)
+; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
+; GFX9-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1_signext, csr_amdgpu, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: test_call_void_func_i1_signext
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX11-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1_signext
+; GFX11-NEXT: $sgpr0 = COPY [[LOAD]](s1)
+; GFX11-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1_signext, csr_amdgpu, implicit $sgpr0
+; GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX11-NEXT: SI_RETURN
+ %val = load i1, ptr addrspace(1) undef
+ call void @void_func_i1_signext(i1 %val)
+ ret void
+}
+
+define void @void_func_a2i1([2 x i1] %arg0) {
+; GFX9-LABEL: name: void_func_a2i1
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
+; GFX9-NEXT: {{ $}}
+; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s1) = COPY $sgpr4_sgpr5
+; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_64(s1) = COPY $sgpr6_sgpr7
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: G_STORE [[COPY]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: [[CONST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+; GFX9-NEXT: [[PTRADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[CONST]](s64)
+; GFX9-NEXT: G_STORE [[COPY2]](s1), [[PTRADD]](p1) :: (store (s1) into `ptr addrspace(1) undef` + 1, addrspace 1)
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: void_func_a2i1
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: liveins: $sgpr0, $sgpr1
+; GFX11-NEXT: {{ $}}
+; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32(s1) = COPY $sgpr0
+; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s1) = COPY $sgpr1
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: G_STORE [[COPY]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: [[CONST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+; GFX11-NEXT: [[PTRADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[CONST]](s64)
+; GFX11-NEXT: G_STORE [[COPY2]](s1), [[PTRADD]](p1) :: (store (s1) into `ptr addrspace(1) undef` + 1, addrspace 1)
+; GFX11-NEXT: SI_RETURN
+ store [2 x i1] %arg0, ptr addrspace(1) undef
+ ret void
+}
+
+define void @test_call_void_func_a2i1() {
+; GFX9-LABEL: name: test_call_void_func_a2i1
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: [[CONST1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+; GFX9-NEXT: [[CONST2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX9-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_a2i1
+; GFX9-NEXT: $sgpr0_sgpr1 = COPY [[CONST1]](s1)
+; GFX9-NEXT: $sgpr2_sgpr3 = COPY [[CONST2]](s1)
+; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
+; GFX9-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_a2i1, csr_amdgpu, implicit $sgpr0_sgpr1, implicit $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: test_call_void_func_a2i1
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: [[CONST1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+; GFX11-NEXT: [[CONST2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+; GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX11-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_a2i1
+; GFX11-NEXT: $sgpr0 = COPY [[CONST1]](s1)
+; GFX11-NEXT: $sgpr1 = COPY [[CONST2]](s1)
+; GFX11-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_a2i1, csr_amdgpu, implicit $sgpr0, implicit $sgpr1
+; GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX11-NEXT: SI_RETURN
+ %1 = insertvalue [2 x i1] undef, i1 0, 0
+ %2 = insertvalue [2 x i1] %1, i1 1, 1
+ call void @void_func_a2i1([2 x i1] %2)
+ ret void
+}
+
+define void @void_func_i1_i1(i1 %arg0, i1 %arg1) {
+; GFX9-LABEL: name: void_func_i1_i1
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
+; GFX9-NEXT: {{ $}}
+; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s1) = COPY $sgpr4_sgpr5
+; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_64(s1) = COPY $sgpr6_sgpr7
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: G_STORE [[COPY]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: G_STORE [[COPY2]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: void_func_i1_i1
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: liveins: $sgpr0, $sgpr1
+; GFX11-NEXT: {{ $}}
+; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32(s1) = COPY $sgpr0
+; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32(s1) = COPY $sgpr1
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: G_STORE [[COPY]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: G_STORE [[COPY2]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: SI_RETURN
+ store volatile i1 %arg0, ptr addrspace(1) undef
+ store volatile i1 %arg1, ptr addrspace(1) undef
+ ret void
+}
+
+define void @test_call_void_func_i1_i1() {
+; GFX9-LABEL: name: test_call_void_func_i1_i1
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX9-NEXT: [[CONST:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX9-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1_i1
+; GFX9-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](s1)
+; GFX9-NEXT: $sgpr2_sgpr3 = COPY [[CONST]](s1)
+; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
+; GFX9-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1_i1, csr_amdgpu, implicit $sgpr0_sgpr1, implicit $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX9-NEXT: SI_RETURN
+;
+; GFX11-LABEL: name: test_call_void_func_i1_i1
+; GFX11: bb.1 (%ir-block.0):
+; GFX11-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+; GFX11-NEXT: [[CONST:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
+; GFX11-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+; GFX11-NEXT: [[GLOBAL:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_i1_i1
+; GFX11-NEXT: $sgpr0 = COPY [[LOAD]](s1)
+; GFX11-NEXT: $sgpr1 = COPY [[CONST]](s1)
+; GFX11-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GLOBAL]](p0), @void_func_i1_i1, csr_amdgpu, implicit $sgpr0, implicit $sgpr1
+; GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+; GFX11-NEXT: SI_RETURN
+ %val = load i1, ptr addrspace(1) undef
+ call void @void_func_i1_i1(i1 %val, i1 true)
+ ret void
+}
+
+define void @many_i1_args(
+ i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3, i1 %arg4, i1 %arg5, i1 %arg6, i1 %arg7,
+ i1 %arg8, i1 %arg9, i1 %arg10, i1 %arg11, i1 %arg12, i1 %arg13, i1 %arg14, i1 %arg15,
+ i1 %arg16, i1 %arg17, i1 %arg18, i1 %arg19, i1 %arg20, i1 %arg21, i1 %arg22, i1 %arg23,
+ i1 %arg24, i1 %arg25, i1 %arg26, i1 %arg27, i1 %arg28, i1 %arg29, i1 %arg30, i1 %arg31) {
+; GFX9-LABEL: name: many_i1_args
+; GFX9: bb.1 (%ir-block.0):
+; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr14_sgpr15, $sgpr16_sgpr17, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29
+; GFX9-NEXT: {{ $}}
+; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64(s1) = COPY $sgpr4_sgpr5
+; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64(s1) = COPY $sgpr6_sgpr7
+; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_64(s1) = COPY $sgpr8_sgpr9
+; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_64(s1) = COPY $sgpr10_sgpr11
+; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_64(s1) = COPY $sgpr12_sgpr13
+; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_64(s1) = COPY $sgpr14_sgpr15
+; GFX9-NEXT: [[COPY6:%[0-9]+]]:sreg_64(s1) = COPY $sgpr16_sgpr17
+; GFX9-NEXT: [[COPY7:%[0-9]+]]:sreg_64(s1) = COPY $sgpr18_sgpr19
+; GFX9-NEXT: [[COPY8:%[0-9]+]]:sreg_64(s1) = COPY $sgpr20_sgpr21
+; GFX9-NEXT: [[COPY9:%[0-9]+]]:sreg_64(s1) = COPY $sgpr22_sgpr23
+; GFX9-NEXT: [[COPY10:%[0-9]+]]:sreg_64(s1) = COPY $sgpr24_sgpr25
+; GFX9-NEXT: [[COPY11:%[0-9]+]]:sreg_64(s1) = COPY $sgpr26_sgpr27
+; GFX9-NEXT: [[COPY12:%[0-9]+]]:sreg_64(s1) = COPY $sgpr28_sgpr29
+; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr0
+; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s1) = G_TRUNC [[COPY13]](s32)
----------------
arsenm wrote:
We could try bitpacking, but this is probably the right choice.
https://github.com/llvm/llvm-project/pull/72461
More information about the llvm-commits
mailing list