[PATCH] D46396: Optionally simplify basic blocks introduced by AtomicExpandPass

Fri May 4 12:51:38 PDT 2018

kparzysz added inline comments.

================
Comment at: lib/Target/AArch64/AArch64TargetMachine.cpp:376
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
     addPass(createCFGSimplificationPass(1, true, true, false, true));

----------------
kparzysz wrote:
> ab wrote:
> > This already does the simplification, no?  How about adding the pass for Hexagon as well?  I suppose this patch limits it to a smaller set of blocks, but on the other hand it's nice not to have to deal with it in the pass.
> Evidently it doesn't a good enough job---check the testcase.  On Hexagon we don't want to run the full simplify-cfg.  I've tried that and it broke over 100 lit tests and I don't even know what impact it would have on performance.  This would be just a way too big of a hammer.
Let me attach the testcase outputs (I somehow thought it was more evident in the testcase itself).

Without this patch

```
f0:                                     // @f0
        .cfi_startproc
// %bb.0:                               // %b0
        ldr     w8, [x0]
        add     w9, w8, #1              // =1
        cmp     w9, #17                 // =17
        csinc   w9, wzr, w8, eq
.LBB0_1:                                // %cmpxchg.start
                                        // =>This Inner Loop Header: Depth=1
        ldaxr   w10, [x0]
        cmp     w10, w8
        b.ne    .LBB0_4
// %bb.2:                               // %cmpxchg.trystore
                                        //   in Loop: Header=BB0_1 Depth=1
        stlxr   w10, w9, [x0]
        cbnz    w10, .LBB0_1
// %bb.3:
        orr     w8, wzr, #0x1
        b       .LBB0_5
.LBB0_4:                                // %cmpxchg.nostore
        clrex
        mov     w8, wzr
.LBB0_5:                                // %cmpxchg.end
        cmp     w8, #0                  // =0
        mov     w8, #123
        mov     w9, #321
        csel    w0, w9, w8, ne          // *** The patch eliminates this
                                        // *** select (and the setup code).
        ret
.Lfunc_end0:
        .size   f0, .Lfunc_end0-f0
        .cfi_endproc
                                        // -- End function
```

With the patch:

```
f0:                                     // @f0
        .cfi_startproc
// %bb.0:                               // %b0
        ldr     w8, [x0]
        add     w9, w8, #1              // =1
        cmp     w9, #17                 // =17
        csinc   w9, wzr, w8, eq
.LBB0_1:                                // %cmpxchg.start
                                        // =>This Inner Loop Header: Depth=1
        ldaxr   w10, [x0]
        cmp     w10, w8
        b.ne    .LBB0_4
// %bb.2:                               // %cmpxchg.fencedstore
                                        //   in Loop: Header=BB0_1 Depth=1
        stlxr   w10, w9, [x0]
        cbnz    w10, .LBB0_1
// %bb.3:
        mov     w0, #321
        ret
.LBB0_4:                                // %cmpxchg.nostore
        clrex
        mov     w0, #123
        ret
.Lfunc_end0:
        .size   f0, .Lfunc_end0-f0
        .cfi_endproc
                                        // -- End function
```

Repository:
  rL LLVM

https://reviews.llvm.org/D46396