[llvm] [GISel][RISCV]Implement indirect parameter passing for large scalars (PR #95429)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 23:51:56 PDT 2024
================
@@ -16,6 +16,559 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs_stack_fst
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[LOAD]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %y to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack_fst( ) {
+ ; ILP32-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
----------------
spaits wrote:
I think this is correct now. Here is an explanation.
`G_PTR_ADD` is used to get to the right pointer on the stack. It is only zero, when the first stack argument is processed. For example in the tests `caller_128i_in_regs_stack` `G_PTR_ADD` is used with non zero offset multiple times. For example you get the third 32 bit argument (suppose all the other arguments before are 32 bit) on the stack, by taking the stack pointer and and adding 4x2=8 bytes to it.
In short, it is the stack offset, and it can be non zero, if there are other things on the stack, relative to our stack pointer.
Hers is an example I quickly made:
```llvm
define i64 @fun(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i128 %y ) {
%2 = trunc i128 %y to i64
ret i64 %2
}
define i32 @fun_caller( ) {
%1 = call i64 @fun(i64 1,i64 1, i64 1, i64 1, i64 1, i128 2)
%2 = trunc i64 %1 to i32
ret i32 %2
}
```
Now concentrate on the caller.
The full GMIR for the caller.
```llvm
Frame Objects:
fi#0: size=128, align=8, at location [SP]
bb.1 (%ir-block.0):
%1:_(s64) = G_CONSTANT i64 1
%2:_(s128) = G_CONSTANT i128 2
ADJCALLSTACKDOWN 12, 0, implicit-def $x2, implicit $x2
%3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %1:_(s64)
%5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %1:_(s64)
%7:_(s32), %8:_(s32) = G_UNMERGE_VALUES %1:_(s64)
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %1:_(s64)
%11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %1:_(s64)
%13:_(p0) = COPY $x2
%14:_(s32) = G_CONSTANT i32 0
%15:_(p0) = G_PTR_ADD %13:_, %14:_(s32)
G_STORE %11:_(s32), %15:_(p0) :: (store (s32) into stack, align 16)
%16:_(s32) = G_CONSTANT i32 4
%17:_(p0) = G_PTR_ADD %13:_, %16:_(s32)
G_STORE %12:_(s32), %17:_(p0) :: (store (s32) into stack + 4)
%18:_(p0) = G_FRAME_INDEX %stack.0
G_STORE %2:_(s128), %18:_(p0) :: (store (s128), align 8)
%19:_(s32) = G_CONSTANT i32 8
%20:_(p0) = G_PTR_ADD %13:_, %19:_(s32)
G_STORE %18:_(p0), %20:_(p0) :: (store (p0), align 8)
$x10 = COPY %3:_(s32)
$x11 = COPY %4:_(s32)
$x12 = COPY %5:_(s32)
$x13 = COPY %6:_(s32)
$x14 = COPY %7:_(s32)
$x15 = COPY %8:_(s32)
$x16 = COPY %9:_(s32)
$x17 = COPY %10:_(s32)
PseudoCALL target-flags(riscv-call) @fun, <regmask $vlenb $x0 $x1 $x8 $x9 $x18 $x19 $x20 $x21 $x22 $x23 $x24 $x25 $x26 $x27 $x8_x9 $x18_x19 $x20_x21 $x22_x23 $x24_x25 $x26_x27>, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
ADJCALLSTACKUP 12, 0, implicit-def $x2, implicit $x2
%21:_(s32) = COPY $x10
%22:_(s32) = COPY $x11
%0:_(s64) = G_MERGE_VALUES %21:_(s32), %22:_(s32)
%23:_(s32) = G_TRUNC %0:_(s64)
$x10 = COPY %23:_(s32)
PseudoRET implicit $x10
# End machine code for function fun_caller.
```
Relevant parts:
```llvm
Frame Objects:
fi#0: size=128, align=8, at location [SP]
bb.1 (%ir-block.0):
.
.
.
%2:_(s128) = G_CONSTANT i128 2
ADJCALLSTACKDOWN 12, 0, implicit-def $x2, implicit $x2
%13:_(p0) = COPY $x2 ; Get the stack pointer.
.
.
.
%18:_(p0) = G_FRAME_INDEX %stack.0
G_STORE %2:_(s128), %18:_(p0) :: (store (s128), align 8)
%19:_(s32) = G_CONSTANT i32 8 ; Get the offset for the right space in stack (Byets)
; This is the right offset, because before this, a 64 bit value was passed in two 32 bit registers.
; This is how the RISCV calling convention wants that.
; So now we have to have an offset of 32 * 2 = 64 bit = 8 bytes of offset.
%20:_(p0) = G_PTR_ADD %13:_, %19:_(s32) ; Go to the right place in stack with the help of the offset.
G_STORE %18:_(p0), %20:_(p0) :: (store (p0), align 8) ; Store the pointer in that place in the stack for the call.
.
.
.
PseudoCALL target-flags(riscv-call) @fun, <regmask $vlenb $x0 $x1 $x8 $x9 $x18 $x19 $x20 $x21 $x22 $x23 $x24 $x25 $x26 $x27 $x8_x9 $x18_x19 $x20_x21 $x22_x23 $x24_x25 $x26_x27>, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11 ; The call happens
.
.
.
```
What do you think?
https://github.com/llvm/llvm-project/pull/95429
More information about the llvm-commits
mailing list