[llvm] b9a0384 - GlobalISel: Preserve source value information for outgoing byval args
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 18 06:17:03 PDT 2021
Author: Matt Arsenault
Date: 2021-03-18T09:16:54-04:00
New Revision: b9a03849836f6409291025a31089bfabfa96dd0b
URL: https://github.com/llvm/llvm-project/commit/b9a03849836f6409291025a31089bfabfa96dd0b
DIFF: https://github.com/llvm/llvm-project/commit/b9a03849836f6409291025a31089bfabfa96dd0b.diff
LOG: GlobalISel: Preserve source value information for outgoing byval args
Pass through the original argument IR value in order to preserve the
aliasing information in the memcpy memory operands.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index f63033cf6136..868980d24fc2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -23,6 +23,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include <cstdint>
@@ -38,7 +39,6 @@ class MachineIRBuilder;
struct MachinePointerInfo;
class MachineRegisterInfo;
class TargetLowering;
-class Value;
class CallLowering {
const TargetLowering *TLI;
@@ -65,10 +65,17 @@ class CallLowering {
// if the argument was an incoming arg.
SmallVector<Register, 2> OrigRegs;
+ /// Optionally track the original IR value for the argument. This may not be
+ /// meaningful in all contexts. This should only be used on for forwarding
+ /// through to use for aliasing information in MachinePointerInfo for memory
+ /// arguments.
+ const Value *OrigValue = nullptr;
+
ArgInfo(ArrayRef<Register> Regs, Type *Ty,
ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
- bool IsFixed = true)
- : BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()) {
+ bool IsFixed = true, const Value *OrigValue = nullptr)
+ : BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()),
+ OrigValue(OrigValue) {
if (!Regs.empty() && Flags.empty())
this->Flags.push_back(ISD::ArgFlagsTy());
// FIXME: We should have just one way of saying "no register".
@@ -77,6 +84,11 @@ class CallLowering {
"only void types should have no register");
}
+ ArgInfo(ArrayRef<Register> Regs, const Value &OrigValue,
+ ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
+ bool IsFixed = true)
+ : ArgInfo(Regs, OrigValue.getType(), Flags, IsFixed, &OrigValue) {}
+
ArgInfo() : BaseArgInfo() {}
};
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 601d087e0453..808be0ff6381 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -112,7 +112,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (auto &Arg : CB.args()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
+ ArgInfo OrigArg{ArgRegs[i], *Arg.get(), getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
@@ -204,7 +204,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags[0], OrigArg.IsFixed);
+ OrigArg.Flags[0], OrigArg.IsFixed,
+ OrigArg.OrigValue);
return;
}
@@ -667,18 +668,19 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
Register StackAddr =
Handler.getStackAddress(MemSize, Offset, DstMPO, Flags);
- const LLT PtrTy = MRI.getType(StackAddr);
-
- // FIXME: We do not have access to the original IR value here to
- // preserve the aliasing information.
- MachinePointerInfo SrcMPO(PtrTy.getAddressSpace());
+ MachinePointerInfo SrcMPO(Args[i].OrigValue);
+ if (!Args[i].OrigValue) {
+ // We still need to accurately track the stack address space if we
+ // don't know the underlying value.
+ const LLT PtrTy = MRI.getType(StackAddr);
+ SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace());
+ }
Align DstAlign = std::max(Flags.getNonZeroByValAlign(),
inferAlignFromPtrInfo(MF, DstMPO));
- // TODO: Theoretically the source value could have a higher alignment,
- // but we don't have that here
- Align SrcAlign = Flags.getNonZeroByValAlign();
+ Align SrcAlign = std::max(Flags.getNonZeroByValAlign(),
+ inferAlignFromPtrInfo(MF, SrcMPO));
Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0],
DstMPO, DstAlign, SrcMPO, SrcAlign,
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 5d062820a49f..067018ba2cff 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -552,6 +552,11 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
MPO.Offset);
}
+ if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
+ const Module *M = MF.getFunction().getParent();
+ return V->getPointerAlignment(M->getDataLayout());
+ }
+
return Align(1);
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index b97e63f51d1e..ef0d4c6ee93c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -462,7 +462,7 @@ bool AArch64CallLowering::lowerFormalArguments(
if (DL.getTypeStoreSize(Arg.getType()).isZero())
continue;
- ArgInfo OrigArg{VRegs[i], Arg.getType()};
+ ArgInfo OrigArg{VRegs[i], Arg};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index c7c4ed45589f..a942a740535a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -656,7 +656,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
}
}
- ArgInfo OrigArg(VRegs[Idx], Arg.getType());
+ ArgInfo OrigArg(VRegs[Idx], Arg);
const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
setArgFlags(OrigArg, OrigArgIdx, DL, F);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index bf632f035572..c0807b83f841 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -3916,7 +3916,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8, align 4, addrspace 5)
+ ; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8 from %ir.val, align 4, addrspace 5)
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -3971,11 +3971,11 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12, align 4, addrspace 5)
+ ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12 from %ir.incoming0, align 4, addrspace 5)
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1, align 32, addrspace 5)
+ ; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1 from %ir.incoming1, align 32, addrspace 5)
; CHECK: $vgpr0 = COPY [[C]](s32)
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
@@ -3995,6 +3995,57 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
ret void
}
+declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %arg0) #0
+
+; Make sure we are aware of the higher alignment of the incoming value
+; than implied by the outgoing byval alignment in the memory operand.
+define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 {
+ ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
+ ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store 32 into stack, align 4, addrspace 5), (dereferenceable load 32 from %ir.incoming_high_align, align 256, addrspace 5)
+ ; CHECK: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+ ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+ ; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; CHECK: $sgpr12 = COPY [[COPY14]](s32)
+ ; CHECK: $sgpr13 = COPY [[COPY15]](s32)
+ ; CHECK: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: ADJCALLSTACKDOWN 0, 32, implicit-def $scc
+ ; CHECK: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
+ ; CHECK: S_SETPC_B64_return [[COPY20]]
+ call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align)
+ ret void
+}
+
define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i8
; CHECK: bb.1 (%ir-block.0):
More information about the llvm-commits
mailing list