[llvm-dev] RFC: alloca -- specify rounding factor for allocation (and more)
Nat! via llvm-dev
llvm-dev at lists.llvm.org
Fri Aug 28 02:43:11 PDT 2015
Hi
sorta piggybacking on the other thread. I am looking for some feedback
on how to implement the following idea in llvm.
The really short version of the idea is this:
* I want to alloca a field (record/struct), so that its size is an even
multiple of 64 bytes. [^1]
* This allocaed field will be exclusively used as an argument to functions
* llvm should be aware of the extra bytes and should be able to use them
in subsequent arguments to function calls (e.g. tail calls)
... Why do I need this ?
http://www.mulle-kybernetik.com/weblog/2015/mulle_objc_meta_call_convention.html
AFAIK AllocaInst can do address alignment but not size alignment. I
wonder if that would be an "OK" addition to llvm, if one could specify a
rounding also ?
Then I would need a way to signal to llvm that this is a special field,
so that it may reuse all the space. Would I mark that passed in struct
with a new __attribute or some such ?
Finally I would need an optimization pass (?), that would check that the
alloca is big enough to hold the values and that the values aren't
needed afterwards anymore and then reuse the alloca.
It would be good to know, by people more well versed in llvm (pretty
much anyone on this list :), if this is basically the right approach and
how much llvm maybe can already do.
Ciao
Nat!
P.S. Here is some code, that shows what is technically desired:
# This is a shell archive. Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file". Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
# Makefile
# types.h
# a_b.c
# a_b_c.c
# main.c
#
echo x - Makefile
sed 's/^X//' >Makefile << 'END-of-Makefile'
XCFLAGS=-g -O3 -DNDEBUG
X
X
Xall: reuse noreuse
X
Xa_b1.o: a_b.c
X $(CC) $(CFLAGS) -c -o $@ -DREUSE=0 $+
X
Xa_b2.o: a_b.c
X $(CC) $(CFLAGS) -c -o $@ -DREUSE=1 $+
X
Xnoreuse: a_b1.o a_b_c.o main.o
X $(CC) -o $@ $(CFLAGS) $+
X
Xreuse: a_b2.o a_b_c.o main.o
X $(CC) -o $@ $(CFLAGS) $+
X
Xclean:
X rm *.o noreuse reuse
X
X
Xreuse.shar: Makefile *.h *.c
X shar $+ > $@
X
END-of-Makefile
echo x - types.h
sed 's/^X//' >types.h << 'END-of-types.h'
Xstruct param_a_b
X{
X int a;
X int b;
X};
X
X
Xstruct param_a_b_c
X{
X struct param_a_b a_b;
X int c;
X};
X
X
Xunion alloc_param_a_b
X{
X struct param_a_b param;
X unsigned char space[ 64 * ((sizeof( struct param_a_b) + 63) / 64)];
X};
X
X
Xunion alloc_param_a_b_c
X{
X struct param_a_b_c param;
X unsigned char space[ 64 * ((sizeof( struct param_a_b_c) + 63)
/ 64)];
X};
END-of-types.h
echo x - a_b.c
sed 's/^X//' >a_b.c << 'END-of-a_b.c'
X#include "types.h"
X#include <assert.h>
X
X
Xextern int g( union alloc_param_a_b_c *space);
X
X
X#if REUSE
X
Xint f( union alloc_param_a_b *space)
X{
X assert( sizeof( union alloc_param_a_b) == sizeof( union
alloc_param_a_b_c));
X
X ((union alloc_param_a_b_c *) space)->param.c = 1848;
X return( g( (union alloc_param_a_b_c *) space));
X}
X
X#else
X
Xint f( union alloc_param_a_b *space)
X{
X union alloc_param_a_b_c x;
X
X x.param.a_b.a = space->param.a;
X x.param.a_b.b = space->param.b;
X x.param.c = 1848;
X
X return( g( &x));
X}
X
X#endif
END-of-a_b.c
echo x - a_b_c.c
sed 's/^X//' >a_b_c.c << 'END-of-a_b_c.c'
X#include "types.h"
X
X
Xint g( union alloc_param_a_b_c *p)
X{
X return( p->param.a_b.a + p->param.a_b.b + p->param.c);
X}
END-of-a_b_c.c
echo x - main.c
sed 's/^X//' >main.c << 'END-of-main.c'
X#include "types.h"
X
X
Xint f( union alloc_param_a_b *space);
X
X
Xint main()
X{
X union alloc_param_a_b args;
X
X args.param.a = 18;
X args.param.b = 48;
X return( f( &args));
X}
END-of-main.c
exit
The potential gains are obvious:
otool -t -v a_b1.o
a_b1.o:
(__TEXT,__text) section
_f:
0000000000000000 pushq %rbp
0000000000000001 movq %rsp, %rbp
0000000000000004 pushq %rbx
0000000000000005 subq $0x48, %rsp
0000000000000009 movq (%rip), %rbx
0000000000000010 movq (%rbx), %rbx
0000000000000013 movq %rbx, -0x10(%rbp)
0000000000000017 movl (%rdi), %eax
0000000000000019 movl %eax, -0x50(%rbp)
000000000000001c movl 0x4(%rdi), %eax
000000000000001f movl %eax, -0x4c(%rbp)
0000000000000022 movl $0x738, -0x48(%rbp) ## imm = 0x738
0000000000000029 leaq -0x50(%rbp), %rdi
000000000000002d callq 0x32
0000000000000032 cmpq -0x10(%rbp), %rbx
0000000000000036 jne 0x3f
0000000000000038 addq $0x48, %rsp
000000000000003c popq %rbx
000000000000003d popq %rbp
000000000000003e retq
000000000000003f callq 0x44
otool -t -v a_b2.o
a_b2.o:
(__TEXT,__text) section
_f:
0000000000000000 pushq %rbp
0000000000000001 movq %rsp, %rbp
0000000000000004 movl $0x738, 0x8(%rdi) ## imm = 0x738
000000000000000b popq %rbp
000000000000000c jmp 0x11
[^1]
A workaround from clang would be to wrap the struct into a union. (See
example code)
More information about the llvm-dev
mailing list