[llvm-bugs] [Bug 46195] New: DAGCombiner can incorrectly order promoted loads

via llvm-bugs llvm-bugs at lists.llvm.org
Thu Jun 4 03:10:46 PDT 2020


https://bugs.llvm.org/show_bug.cgi?id=46195

            Bug ID: 46195
           Summary: DAGCombiner can incorrectly order promoted loads
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Common Code Generator Code
          Assignee: unassignedbugs at nondot.org
          Reporter: ricky at rzhou.org
                CC: llvm-bugs at lists.llvm.org

The following program, manually reduced from the bug report at
https://sqlite.org/forum/forumpost/e7e828bb6f:

```
struct S {
  int x;
  unsigned short y;
};

extern void maybe_mutate(struct S *ptr);

void translate(struct S *ptr) {
  unsigned short c = ptr->y;
  maybe_mutate(ptr);
  ptr->y = 2 | 0x200 | (c & (0x3f | 0x8000));
}
```

incorrectly produces the following when compiled under -O1:

```
translate(S*):                        # @translate(S*)
        push    rbx
        mov     rbx, rdi
        call    maybe_mutate(S*)
        mov     eax, -32707
        and     eax, dword ptr [rbx + 4]
        or      eax, 514
        mov     word ptr [rbx + 4], ax
        pop     rbx
        ret
```

(https://godbolt.org/z/8kQqS7)

Even though `maybe_mutate` can change ptr->y, the load of `c` is moved after
the call to `maybe_mutate`.

Summary of debugging steps:

Building the repro with:

```
clang -O1 -c repro.c -S -o repro.S -c -fno-discard-value-names -mllvm
-print-after-all
```

shows that the problem is introduced somewhere inside the "X86 DAG->DAG
Instruction Selection" phase:

```
...
*** IR Dump After Module Verifier ***
; Function Attrs: nounwind uwtable
define dso_local void @translate(%struct.S* %ptr) local_unnamed_addr #0 {
entry:
  %y = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 1
  %0 = load i16, i16* %y, align 4, !tbaa !2
  ^^^ correct
  call void @maybe_mutate(%struct.S* %ptr) #2
  %1 = and i16 %0, -32707
  %2 = or i16 %1, 514
  store i16 %2, i16* %y, align 4, !tbaa !2
  ret void
}
# *** IR Dump After X86 DAG->DAG Instruction Selection ***:
# Machine code for function translate: IsSSA, TracksLiveness
Function Live Ins: $rdi in %0

bb.0.entry:
  liveins: $rdi
  %0:gr64 = COPY $rdi
  ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead
$eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
  $rdi = COPY %0:gr64
  CALL64pcrel32 @maybe_mutate, <regmask $bh $bl $bp $bph $bpl $bx $ebp $ebx
$hbp $hbx $rbp $rbx $r12 $r13 $r14 $r15 $r12b $r13b $r14b $r15b $r12bh $r13bh
$r14bh $r15bh $r12d $r13d $r14d $r15d $r12w $r13w $r14w $r15w $r12wh and 3
more...>, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp,
implicit-def $ssp
  ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags,
implicit-def dead $ssp, implicit $rsp, implicit $ssp
  %1:gr32 = MOV32ri -32707
  %2:gr32 = AND32rm %1:gr32(tied-def 0), %0:gr64, 1, $noreg, 4, $noreg,
implicit-def dead $eflags :: (load 2 from %ir.y, align 4, !tbaa !2)
  ^^^ incorrect
  %3:gr32 = ADD32ri_DB %2:gr32(tied-def 0), 514, implicit-def dead $eflags
  %4:gr16 = COPY %3.sub_16bit:gr32
  MOV16mr %0:gr64, 1, $noreg, 4, $noreg, killed %4:gr16 :: (store 2 into %ir.y,
align 4, !tbaa !2)
  RET 0

# End machine code for function translate.
...
```

Building with:

```
clang -O1 -c repro.c -S -o repro.S -c -fno-discard-value-names -mllvm
-debug-only=isel
```

shows the issue being introduced somewhere here:

```
Legalized selection DAG: %bb.0 'translate:entry'
SelectionDAG has 22 nodes:
  t0: ch = EntryToken
  t2: i64,ch = CopyFromReg t0, Register:i64 %0
  t4: i64 = add nuw t2, Constant:i64<4>
  t7: i16,ch = load<(load 2 from %ir.y, align 4, !tbaa !2)> t0, t4, undef:i64
  ^^^ incorrect
    t10: ch,glue = callseq_start t7:1, TargetConstant:i64<0>,
TargetConstant:i64<0>
  t12: ch,glue = CopyToReg t10, Register:i64 $rdi, t2
  t15: ch,glue = X86ISD::CALL t12, TargetGlobalAddress:i64<void (%struct.S*)*
@maybe_mutate> 0, Register:i64 $rdi, RegisterMask:Untyped, t12:1
      t16: ch,glue = callseq_end t15, TargetConstant:i64<0>,
TargetConstant:i64<0>, t15:1
        t18: i16 = and t7, Constant:i16<-32707>
      t20: i16 = or t18, Constant:i16<514>
    t21: ch = store<(store 2 into %ir.y, align 4, !tbaa !2)> t16, t20, t4,
undef:i64
  t23: ch = X86ISD::RET_FLAG t21, TargetConstant:i32<0>


Optimized legalized selection DAG: %bb.0 'translate:entry'
SelectionDAG has 23 nodes:
  t0: ch = EntryToken
  t2: i64,ch = CopyFromReg t0, Register:i64 %0
  t4: i64 = add nuw t2, Constant:i64<4>
    t10: ch,glue = callseq_start t0, TargetConstant:i64<0>,
TargetConstant:i64<0>
  t12: ch,glue = CopyToReg t10, Register:i64 $rdi, t2
  t15: ch,glue = X86ISD::CALL t12, TargetGlobalAddress:i64<void (%struct.S*)*
@maybe_mutate> 0, Register:i64 $rdi, RegisterMask:Untyped, t12:1
      t16: ch,glue = callseq_end t15, TargetConstant:i64<0>,
TargetConstant:i64<0>, t15:1
            t28: i32,ch = load<(load 2 from %ir.y, align 4, !tbaa !2), anyext
from i16> t0, t4, undef:i64
            ^^^ incorrect
          t30: i32 = and t28, Constant:i32<-32707>
        t26: i32 = or t30, Constant:i32<514>
      t27: i16 = truncate t26
    t21: ch = store<(store 2 into %ir.y, align 4, !tbaa !2)> t16, t27, t4,
undef:i64
  t23: ch = X86ISD::RET_FLAG t21, TargetConstant:i32<0>
```

Which implies that the bug is introduced somewhere inside of:
https://github.com/llvm/llvm-project/blob/b28167928d2722e8774da3fce8b3307a0fa27245/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp#L818

Adding some logging to dump the DAG in DAGCombiner, I believe I've narrowed it
down to this code:

https://github.com/llvm/llvm-project/blob/b28167928d2722e8774da3fce8b3307a0fa27245/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L1144-L1146

Here, we're promoting a load from of an i16 to a load of an i32. When we create
the new load node, we set its chain to that of the original load. This ensures
that the new load happens after anything that the original load happens after.
However, it does not ensure that things which happen after the original load
also happen after the new load - in particular, the callseq_start for the call
to `maybe_mutate`.

Replacing:

```
    return DAG.getExtLoad(ExtType, DL, PVT,
                          LD->getChain(), LD->getBasePtr(),
                          MemVT, LD->getMemOperand());
```

with:


```
    SDValue NewLoad = DAG.getExtLoad(ExtType, DL, PVT,
                          LD->getChain(), LD->getBasePtr(),
                          MemVT, LD->getMemOperand());
    DAG.makeEquivalentMemoryOrdering(LD, NewLoad);
    return NewLoad;
```

fixes the issue for me. If the above sounds reasonable to folks, I'd be happy
to whip up a patch/test for this.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200604/9e1a4136/attachment-0001.html>


More information about the llvm-bugs mailing list