[llvm] d1ff003 - [SelectionDAGBuilder] Stop setting alignment to one for hidden sret values
Alex Richardson via llvm-commits
llvm-commits at lists.llvm.org
Mon May 4 06:44:57 PDT 2020
Author: Alex Richardson
Date: 2020-05-04T14:44:39+01:00
New Revision: d1ff003fbbb36891ca7752785dec86cfd1a76139
URL: https://github.com/llvm/llvm-project/commit/d1ff003fbbb36891ca7752785dec86cfd1a76139
DIFF: https://github.com/llvm/llvm-project/commit/d1ff003fbbb36891ca7752785dec86cfd1a76139.diff
LOG: [SelectionDAGBuilder] Stop setting alignment to one for hidden sret values
We allocated a suitably aligned frame index so we know that all the values
have ABI alignment.
For MIPS this avoids using pair of lwl + lwr instructions instead of a
single lw. I found this when compiling CHERI pure capability code where
we can't use the lwl/lwr unaligned loads/stores and and were to falling
back to a byte load + shift + or sequence.
This should save a few instructions for MIPS and possibly other backends
that don't have fast unaligned loads/stores.
It also improves code generation for CodeGen/X86/pr34653.ll and
CodeGen/WebAssembly/offset.ll since they can now use aligned loads.
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D78999
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/test/CodeGen/Mips/implicit-sret.ll
llvm/test/CodeGen/WebAssembly/offset.ll
llvm/test/CodeGen/X86/pr34653.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 680b33e41dbe..944aeab5e1ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1833,6 +1833,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
+ Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
@@ -1841,9 +1842,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
- Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
+ Chains[i] = DAG.getStore(
+ Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
- Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
+ commonAlignment(BaseAlign, Offsets[i]));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -9271,6 +9274,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
@@ -9279,7 +9284,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
- /* Alignment = */ 1);
+ HiddenSRetAlign);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll
index 3cc0892ae809..e86cec37d510 100644
--- a/llvm/test/CodeGen/Mips/implicit-sret.ll
+++ b/llvm/test/CodeGen/Mips/implicit-sret.ll
@@ -16,23 +16,13 @@ define internal void @test() unnamed_addr nounwind {
; CHECK-NEXT: daddiu $4, $sp, 8
; CHECK-NEXT: jal implicit_sret_decl
; CHECK-NEXT: nop
-; CHECK-NEXT: # implicit-def: $at_64
-; CHECK-NEXT: ldl $1, 24($sp)
-; CHECK-NEXT: ldr $1, 31($sp)
+; CHECK-NEXT: ld $6, 24($sp)
+; CHECK-NEXT: ld $5, 16($sp)
+; CHECK-NEXT: ld $7, 32($sp)
+; CHECK-NEXT: lw $1, 8($sp)
; CHECK-NEXT: # implicit-def: $v0_64
-; CHECK-NEXT: ldl $2, 16($sp)
-; CHECK-NEXT: ldr $2, 23($sp)
-; CHECK-NEXT: # implicit-def: $v1_64
-; CHECK-NEXT: ldl $3, 32($sp)
-; CHECK-NEXT: ldr $3, 39($sp)
-; CHECK-NEXT: # implicit-def: $a1
-; CHECK-NEXT: lwl $5, 8($sp)
-; CHECK-NEXT: lwr $5, 11($sp)
-; CHECK-NEXT: # implicit-def: $a0_64
-; CHECK-NEXT: move $4, $5
-; CHECK-NEXT: move $5, $2
-; CHECK-NEXT: move $6, $1
-; CHECK-NEXT: move $7, $3
+; CHECK-NEXT: move $2, $1
+; CHECK-NEXT: move $4, $2
; CHECK-NEXT: jal use_sret
; CHECK-NEXT: nop
; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload
@@ -75,15 +65,9 @@ define internal void @test2() unnamed_addr nounwind {
; CHECK-NEXT: daddiu $4, $sp, 0
; CHECK-NEXT: jal implicit_sret_decl2
; CHECK-NEXT: nop
-; CHECK-NEXT: # implicit-def: $at
-; CHECK-NEXT: lwl $1, 20($sp)
-; CHECK-NEXT: lwr $1, 23($sp)
-; CHECK-NEXT: # implicit-def: $v0
-; CHECK-NEXT: lwl $2, 12($sp)
-; CHECK-NEXT: lwr $2, 15($sp)
-; CHECK-NEXT: # implicit-def: $v1
-; CHECK-NEXT: lwl $3, 4($sp)
-; CHECK-NEXT: lwr $3, 7($sp)
+; CHECK-NEXT: lw $1, 20($sp)
+; CHECK-NEXT: lw $2, 12($sp)
+; CHECK-NEXT: lw $3, 4($sp)
; CHECK-NEXT: # implicit-def: $a0_64
; CHECK-NEXT: move $4, $3
; CHECK-NEXT: # implicit-def: $a1_64
diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll
index d0283386afdd..8e89f2a02bb2 100644
--- a/llvm/test/CodeGen/WebAssembly/offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset.ll
@@ -645,9 +645,9 @@ define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q)
; CHECK-LABEL: aggregate_return:
; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK: i64.store 8($0):p2align=2, $pop[[L0]]{{$}}
+; CHECK: i64.store 8($0), $pop[[L0]]{{$}}
; CHECK: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK: i64.store 0($0):p2align=2, $pop[[L1]]{{$}}
+; CHECK: i64.store 0($0), $pop[[L1]]{{$}}
define {i32,i32,i32,i32} @aggregate_return() {
ret {i32,i32,i32,i32} zeroinitializer
}
diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll
index 9a0b56a90cb7..2f63ac311f2e 100644
--- a/llvm/test/CodeGen/X86/pr34653.ll
+++ b/llvm/test/CodeGen/X86/pr34653.ll
@@ -15,28 +15,22 @@ define void @pr34653() {
; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK-NEXT: callq test
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm1
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm2
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm3
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm4
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm5
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm6
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm7
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm8
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm9
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm10
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm11
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm12
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm13
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm14
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm15
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero
@@ -60,17 +54,11 @@ define void @pr34653() {
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
More information about the llvm-commits
mailing list