[llvm] 1eb49bb - [GlobalISel][CallLowering] Use hasRetAttr for return flags on CallBases
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 28 19:38:44 PDT 2022
Author: Jessica Paquette
Date: 2022-09-28T19:38:24-07:00
New Revision: 1eb49bbab6b65182f651ea96cfe076ac0d9191d7
URL: https://github.com/llvm/llvm-project/commit/1eb49bbab6b65182f651ea96cfe076ac0d9191d7
DIFF: https://github.com/llvm/llvm-project/commit/1eb49bbab6b65182f651ea96cfe076ac0d9191d7.diff
LOG: [GlobalISel][CallLowering] Use hasRetAttr for return flags on CallBases
Given something like this:
```
declare signext i16 @signext_callee()
define i32 @caller() {
%res = call i16 @signext_callee()
...
}
```
CallLowering would miss that signext_callee's return value is sign extended,
because it isn't on the call.
Use hasRetAttr on the CallBase to allow us to catch this.
(This now inserts G_ASSERT_SEXT/G_ASSERT_ZEXT like in the original review.)
Differential Revision: https://reviews.llvm.org/D86228
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 997bd966cd22e..6b366aab8e4d4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -352,6 +352,9 @@ class CallLowering {
ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call,
unsigned ArgIdx) const;
+ /// \returns Flags corresponding to the attributes on the return from \p Call.
+ ISD::ArgFlagsTy getAttributesForReturn(const CallBase &Call) const;
+
/// Adds flags to \p Flags based off of the attributes in \p Attrs.
/// \p OpIdx is the index in \p Attrs to add flags from.
void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 41ffa6473323e..b6a2c6a194c75 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -70,6 +70,15 @@ ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
return Flags;
}
+ISD::ArgFlagsTy
+CallLowering::getAttributesForReturn(const CallBase &Call) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call](Attribute::AttrKind Attr) {
+ return Call.hasRetAttr(Attr);
+ });
+ return Flags;
+}
+
void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
const AttributeList &Attrs,
unsigned OpIdx) const {
@@ -141,7 +150,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
Register ReturnHintAlignReg;
Align ReturnHintAlign;
- Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(CB)};
if (!Info.OrigRet.Ty->isVoidTy()) {
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll
index 2b74fafc62719..48939351ac04c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll
@@ -361,3 +361,83 @@ define void @call_returns_array_size0_struct() {
%call = call [1 x %size0type] @func.returns.array.size0.struct()
ret void
}
+
+; Test extends on return values.
+
+; This should notice that the return value from has_zext_return is zero
+; extended.
+declare zeroext i16 @has_zext_return()
+define i32 @test_zext_return_from_callee() {
+ ; CHECK-LABEL: name: test_zext_return_from_callee
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @has_zext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %val = call i16 @has_zext_return()
+ %ext = zext i16 %val to i32
+ ret i32 %ext
+}
+
+; Same as above, but with zeroext explicitly on the call. Should produce the
+; same codegen.
+define i32 @test_zext_return_from_callee2() {
+ ; CHECK-LABEL: name: test_zext_return_from_callee2
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @has_zext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %val = call zeroext i16 @has_zext_return()
+ %ext = zext i16 %val to i32
+ ret i32 %ext
+}
+
+; This should notice that the return value from has_sext_return is sign
+; extended.
+declare signext i16 @has_sext_return()
+define i32 @test_sext_return_from_callee() {
+ ; CHECK-LABEL: name: test_sext_return_from_callee
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @has_sext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %val = call i16 @has_sext_return()
+ %ext = sext i16 %val to i32
+ ret i32 %ext
+}
+
+; Same as above, but with signext explicitly on the call. Should produce the
+; same codegen.
+define i32 @test_sext_return_from_callee2() {
+ ; CHECK-LABEL: name: test_sext_return_from_callee2
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: BL @has_sext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %val = call signext i16 @has_sext_return()
+ %ext = sext i16 %val to i32
+ ret i32 %ext
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index bcb82d07ccdee..4ded7eba7ab15 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -280,7 +280,8 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -341,7 +342,8 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -482,10 +484,10 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
- ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+ ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
+ ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call i8 @external_i8_zeroext_func_void()
@@ -544,10 +546,10 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
- ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+ ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
+ ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call i8 @external_i8_signext_func_void()
@@ -665,7 +667,8 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -726,7 +729,8 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
More information about the llvm-commits
mailing list