[llvm] r238054 - [AArch64][CGP] Sink zext feeding stxr/stlxr into the same block.

Fri Jun 12 17:59:41 PDT 2015

Hello Ahmed,

this commit causes hanging during optimization of the attached IR file.
To reproduce the hang just run

    llc -O1 aarch64-hang.ll

llc won't finish until you kill it.

Regards,
Sergey

On Fri, May 22, 2015 at 02:37:17PM -0700, Ahmed Bougacha wrote:
> Author: ab
> Date: Fri May 22 16:37:17 2015
> New Revision: 238054
>
> URL: http://llvm.org/viewvc/llvm-project?rev=238054&view=rev
> Log:
> [AArch64][CGP] Sink zext feeding stxr/stlxr into the same block.
>
> The usual CodeGenPrepare trickery, on a target-specific intrinsic.
> Without this, the expansion of atomics will usually have the zext
> be hoisted out of the loop, defeating the various patterns we have
> to catch this precise case.
>
> Differential Revision: http://reviews.llvm.org/D9930
>
> Modified:
>     llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
>     llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll
>
> Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=238054&r1=238053&r2=238054&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
> +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Fri May 22 16:37:17 2015
> @@ -1397,6 +1397,16 @@ bool CodeGenPrepare::OptimizeCallInst(Ca
>        }
>        return false;
>      }
> +    case Intrinsic::aarch64_stlxr:
> +    case Intrinsic::aarch64_stxr: {
> +      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
> +      if (!ExtVal || !ExtVal->hasOneUse() ||
> +          ExtVal->getParent() == CI->getParent())
> +        return false;
> +      // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
> +      ExtVal->moveBefore(CI);
> +      return true;
> +    }
>      }
>
>      if (TLI) {
>
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll?rev=238054&r1=238053&r2=238054&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-atomic.ll Fri May 22 16:37:17 2015
> @@ -2,12 +2,11 @@
>
>  define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
>  ; CHECK-LABEL: val_compare_and_swap:
> -; CHECK-NEXT: ubfx   x[[NEWVAL_REG:[0-9]+]], x2, #0, #32
>  ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
>  ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
>  ; CHECK-NEXT: cmp    [[RESULT]], w1
>  ; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
> -; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w[[NEWVAL_REG]], [x0]
> +; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w2, [x0]
>  ; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
>  ; CHECK-NEXT: [[LABEL2]]:
>    %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
> @@ -17,12 +16,11 @@ define i32 @val_compare_and_swap(i32* %p
>
>  define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
>  ; CHECK-LABEL: val_compare_and_swap_rel:
> -; CHECK-NEXT: ubfx   x[[NEWVAL_REG:[0-9]+]], x2, #0, #32
>  ; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
>  ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
>  ; CHECK-NEXT: cmp    [[RESULT]], w1
>  ; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
> -; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w[[NEWVAL_REG]], [x0]
> +; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w2, [x0]
>  ; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
>  ; CHECK-NEXT: [[LABEL2]]:
>    %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
-------------- next part --------------
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-elf"

@h = global i32 0, align 4

define i1 @_Z4doitv() {
entry:
  %call = tail call i32* @_Z3getv()
  %0 = load i32, i32* %call, align 4
  %1 = cmpxchg i32* @h, i32 0, i32 %0 seq_cst seq_cst
  %2 = extractvalue { i32, i1 } %1, 1
  ret i1 %2
}

declare i32* @_Z3getv()