[PATCH] D95543: [GVN] Clobber partially aliased loads.
Nikita Popov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 23 10:02:46 PDT 2021
nikic accepted this revision.
nikic added a comment.
This revision is now accepted and ready to land.
LGTM, you might want to pick up the additional test cases.
================
Comment at: llvm/lib/Transforms/Scalar/GVN.cpp:1009
+ const auto ClobberOff = MD->getClobberOffset();
+ // TODO: GVN has no deal with a negative offset.
+ Offset = (ClobberOff == None || ClobberOff.getValue() < 0)
----------------
I don't think we can really do anything with negative offsets, unless we want to do something like shorten the second load and combine it with the value of the first load. That's unlikely to be profitable.
================
Comment at: llvm/test/Transforms/GVN/PRE/rle.ll:736
+; Cross block partial alias case with phi translation.
+; FIXME: Currently we fail to clobber both partially aliased cases of load.
+define i32 @load_load_partial_alias_cross_block_phi_trans(i8* %P) nounwind ssp {
----------------
nikic wrote:
> I believe for phi translation to work you need to have the translated geps in the predecessor. This test case worked for me:
> ```
> define i32 @load_load_partial_alias_cross_block_phi_trans(i8* %P) nounwind {
> ; LE-LABEL: @load_load_partial_alias_cross_block_phi_trans(
> ; LE-NEXT: entry:
> ; LE-NEXT: [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
> ; LE-NEXT: [[X1:%.*]] = load i32, i32* [[XX]], align 4
> ; LE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
> ; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
> ; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
> ; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8
> ; LE-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
> ; LE-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
> ; LE: if:
> ; LE-NEXT: br label [[JOIN:%.*]]
> ; LE: else:
> ; LE-NEXT: br label [[JOIN]]
> ; LE: join:
> ; LE-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
> ; LE-NEXT: [[CONV6:%.*]] = zext i8 [[TMP5]] to i32
> ; LE-NEXT: ret i32 [[CONV6]]
> ; LE: if.end:
> ; LE-NEXT: ret i32 52
> ;
> ; BE-LABEL: @load_load_partial_alias_cross_block2(
> ; BE-NEXT: entry:
> ; BE-NEXT: [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
> ; BE-NEXT: [[X1:%.*]] = load i32, i32* [[XX]], align 4
> ; BE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
> ; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
> ; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
> ; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16
> ; BE-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
> ; BE-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
> ; BE: if:
> ; BE-NEXT: br label [[JOIN:%.*]]
> ; BE: else:
> ; BE-NEXT: br label [[JOIN]]
> ; BE: join:
> ; BE-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
> ; BE-NEXT: [[CONV6:%.*]] = zext i8 [[TMP5]] to i32
> ; BE-NEXT: ret i32 [[CONV6]]
> ; BE: if.end:
> ; BE-NEXT: ret i32 52
> ;
> entry:
> %xx = bitcast i8* %P to i32*
> %x1 = load i32, i32* %xx, align 4
> %cmp = icmp eq i32 %x1, 127
> br i1 %cmp, label %if, label %else
>
> if:
> %arrayidx.if = getelementptr inbounds i8, i8* %P, i64 1
> br label %join
>
> else:
> %arrayidx.else = getelementptr inbounds i8, i8* %P, i64 2
> br label %join
>
> join:
> %idx = phi i64 [ 1, %if ], [ 2, %else ]
> %arrayidx4 = getelementptr inbounds i8, i8* %P, i64 %idx
> %tmp5 = load i8, i8* %arrayidx4, align 1
> %conv6 = zext i8 %tmp5 to i32
> ret i32 %conv6
>
> if.end:
> ret i32 52
> }
> ```
>
> I think the actually problematic case probably has something to do with phi translation in loops (where you translate into the same block, but in a different iteration). I'll try to play with that tomorrow, but if I don't find anything let's just land this and see if any miscompiles turn up :)
I constructed a case that involves a loop, but everything seems to work fine in that case as well:
```
define void @load_load_partial_alias_loop(i8* %P) {
; LE-LABEL: @load_load_partial_alias_loop(
; LE-NEXT: entry:
; LE-NEXT: [[P_1:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 1
; LE-NEXT: [[V_1:%.*]] = load i8, i8* [[P_1]], align 1
; LE-NEXT: call void @use.i8(i8 [[V_1]])
; LE-NEXT: [[P_1_32:%.*]] = bitcast i8* [[P_1]] to i32*
; LE-NEXT: [[V_1_32:%.*]] = load i32, i32* [[P_1_32]], align 4
; LE-NEXT: call void @use.i32(i32 [[V_1_32]])
; LE-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8
; LE-NEXT: br label [[LOOP:%.*]]
; LE: loop:
; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
; LE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
; LE-NEXT: [[P_I:%.*]] = getelementptr i8, i8* [[P]], i64 [[I]]
; LE-NEXT: call void @use.i8(i8 [[V_I]])
; LE-NEXT: [[P_I_32:%.*]] = bitcast i8* [[P_I]] to i32*
; LE-NEXT: [[V_I_32:%.*]] = load i32, i32* [[P_I_32]], align 4
; LE-NEXT: call void @use.i32(i32 [[V_I_32]])
; LE-NEXT: [[I_INC]] = add i64 [[I]], 1
; LE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8
; LE-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8
; LE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
; LE: loop.loop_crit_edge:
; LE-NEXT: br label [[LOOP]]
; LE: exit:
; LE-NEXT: ret void
;
; BE-LABEL: @load_load_partial_alias_loop(
; BE-NEXT: entry:
; BE-NEXT: [[P_1:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 1
; BE-NEXT: [[V_1:%.*]] = load i8, i8* [[P_1]], align 1
; BE-NEXT: call void @use.i8(i8 [[V_1]])
; BE-NEXT: [[P_1_32:%.*]] = bitcast i8* [[P_1]] to i32*
; BE-NEXT: [[V_1_32:%.*]] = load i32, i32* [[P_1_32]], align 4
; BE-NEXT: call void @use.i32(i32 [[V_1_32]])
; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24
; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
; BE-NEXT: br label [[LOOP:%.*]]
; BE: loop:
; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
; BE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
; BE-NEXT: [[P_I:%.*]] = getelementptr i8, i8* [[P]], i64 [[I]]
; BE-NEXT: call void @use.i8(i8 [[V_I]])
; BE-NEXT: [[P_I_32:%.*]] = bitcast i8* [[P_I]] to i32*
; BE-NEXT: [[V_I_32:%.*]] = load i32, i32* [[P_I_32]], align 4
; BE-NEXT: call void @use.i32(i32 [[V_I_32]])
; BE-NEXT: [[I_INC]] = add i64 [[I]], 1
; BE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16
; BE-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8
; BE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
; BE: loop.loop_crit_edge:
; BE-NEXT: br label [[LOOP]]
; BE: exit:
; BE-NEXT: ret void
;
entry:
%P.1 = getelementptr i8, i8* %P, i64 1
%v.1 = load i8, i8* %P.1
call void @use.i8(i8 %v.1)
%P.1.32 = bitcast i8* %P.1 to i32*
%v.1.32 = load i32, i32* %P.1.32
call void @use.i32(i32 %v.1.32)
br label %loop
loop:
%i = phi i64 [ 1, %entry ], [ %i.inc, %loop ]
%P.i = getelementptr i8, i8* %P, i64 %i
%v.i = load i8, i8* %P.i
call void @use.i8(i8 %v.i)
%P.i.32 = bitcast i8* %P.i to i32*
%v.i.32 = load i32, i32* %P.i.32
call void @use.i32(i32 %v.i.32)
%i.inc = add i64 %i, 1
%cmp = icmp ne i64 %i.inc, 64
br i1 %cmp, label %loop, label %exit
exit:
ret void
}
declare void @use.i8(i8) readnone
declare void @use.i32(i32) readnone
```
Now, I have some doubts that these transforms are //profitable//, but at least they're not wrong.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D95543/new/
https://reviews.llvm.org/D95543
More information about the llvm-commits
mailing list