[LLVMdev] How to improve code generated for 'getelementptr' ?
Jeroen Dobbelaere
Jeroen.Dobbelaere at synopsys.com
Thu Feb 2 07:18:47 PST 2012
Hi all,
I am working on an llvm backend for a processor with a relative simple instruction set.
For small loops, the code that is produced depends heavily on how the loop is specified:
The less information we provide to clang, the better the loop code becomes...
Any idea how I can learn llvm that we don't have load/store instructions with register index,
so that it is more efficient to convert Init1 to incrementing a pointer instead of
recomputing the address of 'data[i]' every time ?
The sample C-code looks like :
---
void Init1(int* data)
{
int i=0;
for (i=0; i<100; ++i) {
data[i]=1;
}
}
void Init2(int* p, int* e)
{
while (p!=e) {
*p++=4;
}
}
---
This produces following assembly code for -O3 (see below).
NOTE: a branch instruction has a delay slot
For Init1, the loop body consists of 8 instructions.
For Init2, it consists of 5 instructions which is already much better
(The optimal would use 4 instructions)
Question: how can we teach llvm to provide code like Init2, for input of Init1 ?
---
.text
.globl _Init1
_Init1: ;; @Init1
;; BB#0: ;; %entry
ldi R1 , 0
ldi R2 , 1
ldi R3 , 100
_BB1_1: ;; %for.body
;; =>This Inner Loop Header: Depth=1
mov R4 , R1
add R1 , 1
mov R5 , R0
sll R4 , 2
cmpne R1 , R3
add R5 , R4
bcc24 _BB1_1
sw R2 , R5, 0
;; BB#2: ;; %for.end
b r15
nop
.globl _Init2
_Init2: ;; @Init2
;; BB#0: ;; %entry
cmpeq R0 , R1
bcc24 _BB2_3
nop
;; BB#1:
ldi R2 , 4
_BB2_2: ;; %while.body
;; =>This Inner Loop Header: Depth=1
sw R2 , R0, 0
add R0 , 4
cmpne R1 , R0
bcc24 _BB2_2
nop
_BB2_3: ;; %while.end
b r15
nop
---
For reference, the .ll file:
---
; ModuleID = 'loop_test.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f64:32:32-v64:32:32-v128:32:32-n32-s0:32:32-a0:0:32-S32"
target triple = "arch--"
define void @Init1(i32* nocapture %data) nounwind {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i32* %data, i32 %i.01
store i32 1, i32* %arrayidx, align 4, !tbaa !0
%inc = add nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
define void @Init2(i32* %p, i32* %e) nounwind {
entry:
%cmp1 = icmp eq i32* %p, %e
br i1 %cmp1, label %while.end, label %while.body
while.body: ; preds = %entry, %while.body
%p.addr.02 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
%incdec.ptr = getelementptr inbounds i32* %p.addr.02, i32 1
store i32 4, i32* %p.addr.02, align 4, !tbaa !0
%cmp = icmp eq i32* %incdec.ptr, %e
br i1 %cmp, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
ret void
}
!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
---
Greetings,
Jeroen Dobbelaere
More information about the llvm-dev
mailing list