[llvm-bugs] [Bug 46195] New: DAGCombiner can incorrectly order promoted loads
via llvm-bugs
llvm-bugs at lists.llvm.org
Thu Jun 4 03:10:46 PDT 2020
https://bugs.llvm.org/show_bug.cgi?id=46195
Bug ID: 46195
Summary: DAGCombiner can incorrectly order promoted loads
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Common Code Generator Code
Assignee: unassignedbugs at nondot.org
Reporter: ricky at rzhou.org
CC: llvm-bugs at lists.llvm.org
The following program, manually reduced from the bug report at
https://sqlite.org/forum/forumpost/e7e828bb6f:
```
struct S {
int x;
unsigned short y;
};
extern void maybe_mutate(struct S *ptr);
void translate(struct S *ptr) {
unsigned short c = ptr->y;
maybe_mutate(ptr);
ptr->y = 2 | 0x200 | (c & (0x3f | 0x8000));
}
```
incorrectly produces the following when compiled under -O1:
```
translate(S*): # @translate(S*)
push rbx
mov rbx, rdi
call maybe_mutate(S*)
mov eax, -32707
and eax, dword ptr [rbx + 4]
or eax, 514
mov word ptr [rbx + 4], ax
pop rbx
ret
```
(https://godbolt.org/z/8kQqS7)
Even though `maybe_mutate` can change ptr->y, the load of `c` is moved after
the call to `maybe_mutate`.
Summary of debugging steps:
Building the repro with:
```
clang -O1 -c repro.c -S -o repro.S -c -fno-discard-value-names -mllvm
-print-after-all
```
shows that the problem is introduced somewhere inside the "X86 DAG->DAG
Instruction Selection" phase:
```
...
*** IR Dump After Module Verifier ***
; Function Attrs: nounwind uwtable
define dso_local void @translate(%struct.S* %ptr) local_unnamed_addr #0 {
entry:
%y = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 1
%0 = load i16, i16* %y, align 4, !tbaa !2
^^^ correct
call void @maybe_mutate(%struct.S* %ptr) #2
%1 = and i16 %0, -32707
%2 = or i16 %1, 514
store i16 %2, i16* %y, align 4, !tbaa !2
ret void
}
# *** IR Dump After X86 DAG->DAG Instruction Selection ***:
# Machine code for function translate: IsSSA, TracksLiveness
Function Live Ins: $rdi in %0
bb.0.entry:
liveins: $rdi
%0:gr64 = COPY $rdi
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead
$eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$rdi = COPY %0:gr64
CALL64pcrel32 @maybe_mutate, <regmask $bh $bl $bp $bph $bpl $bx $ebp $ebx
$hbp $hbx $rbp $rbx $r12 $r13 $r14 $r15 $r12b $r13b $r14b $r15b $r12bh $r13bh
$r14bh $r15bh $r12d $r13d $r14d $r15d $r12w $r13w $r14w $r15w $r12wh and 3
more...>, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp,
implicit-def $ssp
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags,
implicit-def dead $ssp, implicit $rsp, implicit $ssp
%1:gr32 = MOV32ri -32707
%2:gr32 = AND32rm %1:gr32(tied-def 0), %0:gr64, 1, $noreg, 4, $noreg,
implicit-def dead $eflags :: (load 2 from %ir.y, align 4, !tbaa !2)
^^^ incorrect
%3:gr32 = ADD32ri_DB %2:gr32(tied-def 0), 514, implicit-def dead $eflags
%4:gr16 = COPY %3.sub_16bit:gr32
MOV16mr %0:gr64, 1, $noreg, 4, $noreg, killed %4:gr16 :: (store 2 into %ir.y,
align 4, !tbaa !2)
RET 0
# End machine code for function translate.
...
```
Building with:
```
clang -O1 -c repro.c -S -o repro.S -c -fno-discard-value-names -mllvm
-debug-only=isel
```
shows the issue being introduced somewhere here:
```
Legalized selection DAG: %bb.0 'translate:entry'
SelectionDAG has 22 nodes:
t0: ch = EntryToken
t2: i64,ch = CopyFromReg t0, Register:i64 %0
t4: i64 = add nuw t2, Constant:i64<4>
t7: i16,ch = load<(load 2 from %ir.y, align 4, !tbaa !2)> t0, t4, undef:i64
^^^ incorrect
t10: ch,glue = callseq_start t7:1, TargetConstant:i64<0>,
TargetConstant:i64<0>
t12: ch,glue = CopyToReg t10, Register:i64 $rdi, t2
t15: ch,glue = X86ISD::CALL t12, TargetGlobalAddress:i64<void (%struct.S*)*
@maybe_mutate> 0, Register:i64 $rdi, RegisterMask:Untyped, t12:1
t16: ch,glue = callseq_end t15, TargetConstant:i64<0>,
TargetConstant:i64<0>, t15:1
t18: i16 = and t7, Constant:i16<-32707>
t20: i16 = or t18, Constant:i16<514>
t21: ch = store<(store 2 into %ir.y, align 4, !tbaa !2)> t16, t20, t4,
undef:i64
t23: ch = X86ISD::RET_FLAG t21, TargetConstant:i32<0>
Optimized legalized selection DAG: %bb.0 'translate:entry'
SelectionDAG has 23 nodes:
t0: ch = EntryToken
t2: i64,ch = CopyFromReg t0, Register:i64 %0
t4: i64 = add nuw t2, Constant:i64<4>
t10: ch,glue = callseq_start t0, TargetConstant:i64<0>,
TargetConstant:i64<0>
t12: ch,glue = CopyToReg t10, Register:i64 $rdi, t2
t15: ch,glue = X86ISD::CALL t12, TargetGlobalAddress:i64<void (%struct.S*)*
@maybe_mutate> 0, Register:i64 $rdi, RegisterMask:Untyped, t12:1
t16: ch,glue = callseq_end t15, TargetConstant:i64<0>,
TargetConstant:i64<0>, t15:1
t28: i32,ch = load<(load 2 from %ir.y, align 4, !tbaa !2), anyext
from i16> t0, t4, undef:i64
^^^ incorrect
t30: i32 = and t28, Constant:i32<-32707>
t26: i32 = or t30, Constant:i32<514>
t27: i16 = truncate t26
t21: ch = store<(store 2 into %ir.y, align 4, !tbaa !2)> t16, t27, t4,
undef:i64
t23: ch = X86ISD::RET_FLAG t21, TargetConstant:i32<0>
```
Which implies that the bug is introduced somewhere inside of:
https://github.com/llvm/llvm-project/blob/b28167928d2722e8774da3fce8b3307a0fa27245/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp#L818
Adding some logging to dump the DAG in DAGCombiner, I believe I've narrowed it
down to this code:
https://github.com/llvm/llvm-project/blob/b28167928d2722e8774da3fce8b3307a0fa27245/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L1144-L1146
Here, we're promoting a load from of an i16 to a load of an i32. When we create
the new load node, we set its chain to that of the original load. This ensures
that the new load happens after anything that the original load happens after.
However, it does not ensure that things which happen after the original load
also happen after the new load - in particular, the callseq_start for the call
to `maybe_mutate`.
Replacing:
```
return DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
```
with:
```
SDValue NewLoad = DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
DAG.makeEquivalentMemoryOrdering(LD, NewLoad);
return NewLoad;
```
fixes the issue for me. If the above sounds reasonable to folks, I'd be happy
to whip up a patch/test for this.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200604/9e1a4136/attachment-0001.html>
More information about the llvm-bugs
mailing list